From 01d35eac55aac310fc1b24d3d19b06649f775e70 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 24 Apr 2023 09:35:54 -0700 Subject: [PATCH 01/18] Add `pinned_host_vector` --- cpp/include/cudf/io/detail/utils.hpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/io/detail/utils.hpp b/cpp/include/cudf/io/detail/utils.hpp index adb7078d96d..666deb6776e 100644 --- a/cpp/include/cudf/io/detail/utils.hpp +++ b/cpp/include/cudf/io/detail/utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,10 @@ #pragma once +#include + +#include + namespace cudf { namespace io { namespace detail { @@ -23,6 +27,13 @@ namespace detail { * @brief Whether writer writes in chunks or all at once */ enum class SingleWriteMode : bool { YES, NO }; + +/** + * @brief Helper for pinned host memory + */ +template +using pinned_host_vector = thrust::host_vector>(); + } // namespace detail } // namespace io } // namespace cudf From e2393ed772e0989344ddf241c6d57da167eb66a3 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 24 Apr 2023 09:39:52 -0700 Subject: [PATCH 02/18] Revert "Add `pinned_host_vector`" This reverts commit 01d35eac55aac310fc1b24d3d19b06649f775e70. --- cpp/include/cudf/io/detail/utils.hpp | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/cpp/include/cudf/io/detail/utils.hpp b/cpp/include/cudf/io/detail/utils.hpp index 666deb6776e..adb7078d96d 100644 --- a/cpp/include/cudf/io/detail/utils.hpp +++ b/cpp/include/cudf/io/detail/utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,10 +16,6 @@ #pragma once -#include - -#include - namespace cudf { namespace io { namespace detail { @@ -27,13 +23,6 @@ namespace detail { * @brief Whether writer writes in chunks or all at once */ enum class SingleWriteMode : bool { YES, NO }; - -/** - * @brief Helper for pinned host memory - */ -template -using pinned_host_vector = thrust::host_vector>(); - } // namespace detail } // namespace io } // namespace cudf From 7aa1d75ac826ba9f9e3279f49358ceeed02a3b65 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 24 Apr 2023 09:42:16 -0700 Subject: [PATCH 03/18] Move `pinned_host_vector` into `pinned_allocator.hpp` --- .../cudf/detail/utilities/pinned_allocator.hpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/detail/utilities/pinned_allocator.hpp b/cpp/include/cudf/detail/utilities/pinned_allocator.hpp index 84abf7c014f..283511d7491 100644 --- a/cpp/include/cudf/detail/utilities/pinned_allocator.hpp +++ b/cpp/include/cudf/detail/utilities/pinned_allocator.hpp @@ -1,5 +1,5 @@ /* - * Copyright 2008-2022 NVIDIA Corporation + * Copyright 2008-2023 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,8 @@ #include +#include + namespace cudf::detail { /*! \p pinned_allocator is a CUDA-specific host memory allocator @@ -199,4 +201,11 @@ class pinned_allocator { return !operator==(x); } }; + +/** + * @brief Helper for pinned host memory + */ +template +using pinned_host_vector = thrust::host_vector>(); + } // namespace cudf::detail From ca18ce951631471665852b3251381211f5da9069 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 24 Apr 2023 09:53:05 -0700 Subject: [PATCH 04/18] Switch to use `pinned_host_vector` --- conda/recipes/libcudf/meta.yaml | 2 +- cpp/benchmarks/io/text/multibyte_split.cpp | 5 ++--- ...ned_allocator.hpp => pinned_host_vector.hpp} | 2 +- cpp/src/io/text/bgzip_data_chunk_source.cu | 13 +++++-------- cpp/src/io/text/data_chunk_source_factories.cpp | 17 ++++++----------- cpp/src/io/utilities/hostdevice_vector.hpp | 7 +++---- 6 files changed, 18 insertions(+), 28 deletions(-) rename cpp/include/cudf/detail/utilities/{pinned_allocator.hpp => pinned_host_vector.hpp} (99%) diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 2bb571f858d..94b244b94d5 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -139,7 +139,7 @@ outputs: - test -f $PREFIX/include/cudf/detail/utilities/integer_utils.hpp - test -f $PREFIX/include/cudf/detail/utilities/linked_column.hpp - test -f $PREFIX/include/cudf/detail/utilities/logger.hpp - - test -f $PREFIX/include/cudf/detail/utilities/pinned_allocator.hpp + - test -f $PREFIX/include/cudf/detail/utilities/pinned_host_vector.hpp - test -f $PREFIX/include/cudf/detail/utilities/vector_factories.hpp - test -f $PREFIX/include/cudf/detail/utilities/visitor_overload.hpp - test -f $PREFIX/include/cudf/dictionary/detail/concatenate.hpp diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp index 41b5ddb567e..a697c98a320 100644 --- a/cpp/benchmarks/io/text/multibyte_split.cpp +++ b/cpp/benchmarks/io/text/multibyte_split.cpp @@ -23,7 +23,7 @@ #include #include -#include +#include #include #include #include @@ -33,7 +33,6 @@ #include #include -#include #include #include @@ -136,7 +135,7 @@ static void bench_multibyte_split(nvbench::state& state, std::unique_ptr datasource; auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim); auto host_input = std::vector{}; - auto host_pinned_input = thrust::host_vector>{}; + auto host_pinned_input = cudf::detail::pinned_host_vector{}; if (source_type != data_chunk_source_type::device && source_type != data_chunk_source_type::host_pinned) { diff --git a/cpp/include/cudf/detail/utilities/pinned_allocator.hpp b/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp similarity index 99% rename from cpp/include/cudf/detail/utilities/pinned_allocator.hpp rename to cpp/include/cudf/detail/utilities/pinned_host_vector.hpp index 283511d7491..461e76bc218 100644 --- a/cpp/include/cudf/detail/utilities/pinned_allocator.hpp +++ b/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp @@ -203,7 +203,7 @@ class pinned_allocator { }; /** - * @brief Helper for pinned host memory + * @brief A vector class with pinned host memory allocator */ template using pinned_host_vector = thrust::host_vector>(); diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu index 32678191cb1..715f70605df 100644 --- a/cpp/src/io/text/bgzip_data_chunk_source.cu +++ b/cpp/src/io/text/bgzip_data_chunk_source.cu @@ -19,7 +19,7 @@ #include "io/utilities/config_utils.hpp" #include -#include +#include #include #include #include @@ -65,10 +65,7 @@ struct bgzip_nvcomp_transform_functor { class bgzip_data_chunk_reader : public data_chunk_reader { private: template - using pinned_host_vector = thrust::host_vector>; - - template - static void copy_to_device(const pinned_host_vector& host, + static void copy_to_device(cudf::detail::pinned_host_vector const& host, rmm::device_uvector& device, rmm::cuda_stream_view stream) { @@ -84,9 +81,9 @@ class bgzip_data_chunk_reader : public data_chunk_reader { 1 << 16; // 64k offset allocation, resized on demand cudaEvent_t event; - pinned_host_vector h_compressed_blocks; - pinned_host_vector h_compressed_offsets; - pinned_host_vector h_decompressed_offsets; + cudf::detail::pinned_host_vector h_compressed_blocks; + cudf::detail::pinned_host_vector h_compressed_offsets; + cudf::detail::pinned_host_vector h_decompressed_offsets; rmm::device_uvector d_compressed_blocks; rmm::device_uvector d_decompressed_blocks; rmm::device_uvector d_compressed_offsets; diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp index b6c88c1346f..1ff1a4f8ebf 100644 --- a/cpp/src/io/text/data_chunk_source_factories.cpp +++ b/cpp/src/io/text/data_chunk_source_factories.cpp @@ -17,7 +17,7 @@ #include "io/text/device_data_chunks.hpp" #include -#include +#include #include #include @@ -30,16 +30,16 @@ namespace cudf::io::text { namespace { +struct host_ticket { + cudaEvent_t event; + cudf::detail::pinned_host_vector buffer; +}; + /** * @brief A reader which produces owning chunks of device memory which contain a copy of the data * from an istream. */ class datasource_chunk_reader : public data_chunk_reader { - struct host_ticket { - cudaEvent_t event; - thrust::host_vector> buffer; - }; - constexpr static int num_tickets = 2; public: @@ -114,11 +114,6 @@ class datasource_chunk_reader : public data_chunk_reader { * from an istream. */ class istream_data_chunk_reader : public data_chunk_reader { - struct host_ticket { - cudaEvent_t event; - thrust::host_vector> buffer; - }; - constexpr static int num_tickets = 2; public: diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index ff8502b1dcd..566132f8463 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -19,7 +19,7 @@ #include "config_utils.hpp" #include "hostdevice_span.hpp" -#include +#include #include #include #include @@ -67,7 +67,7 @@ class hostdevice_vector { if (hostdevice_vector_uses_pageable_buffer()) { h_data_owner = thrust::host_vector(); } else { - h_data_owner = thrust::host_vector>(); + h_data_owner = cudf::detail::pinned_host_vector(); } std::visit( @@ -177,8 +177,7 @@ class hostdevice_vector { } private: - std::variant, thrust::host_vector>> - h_data_owner; + std::variant, cudf::detail::pinned_host_vector> h_data_owner; T* host_data = nullptr; size_t current_size = 0; rmm::device_uvector d_data; From d7070e73a5f51de900f03b17566ed8b0c38e412f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 24 Apr 2023 10:00:06 -0700 Subject: [PATCH 05/18] Adopt `pinned_host_vector` --- cpp/src/io/orc/writer_impl.cu | 58 +++++++++---------------------- cpp/src/io/orc/writer_impl.hpp | 2 +- cpp/src/io/parquet/writer_impl.cu | 22 +++--------- 3 files changed, 23 insertions(+), 59 deletions(-) diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index f1eb52f63f1..d3f0d82927e 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -79,11 +80,6 @@ struct row_group_index_info { }; namespace { -/** - * @brief Helper for pinned host memory - */ -template -using pinned_buffer = std::unique_ptr; /** * @brief Translates ORC compression to nvCOMP compression @@ -2196,28 +2192,17 @@ std::unique_ptr make_table_meta(table_view const& input) * @param stream CUDA stream used for device memory operations and kernel launches * @return A tuple of the intermediate results containing the processed data */ -std::tuple, - hostdevice_2dvector, - encoded_data, - file_segmentation, - hostdevice_2dvector, - std::vector, - orc_table_view, - rmm::device_buffer, - intermediate_statistics, - pinned_buffer> -convert_table_to_orc_data(table_view const& input, - table_input_metadata const& table_meta, - stripe_size_limits max_stripe_size, - size_type row_index_stride, - bool enable_dictionary, - CompressionKind compression_kind, - size_t compression_blocksize, - statistics_freq stats_freq, - bool single_write_mode, - data_sink const& out_sink, - rmm::cuda_stream_view stream) +auto convert_table_to_orc_data(table_view const& input, + table_input_metadata const& table_meta, + stripe_size_limits max_stripe_size, + size_type row_index_stride, + bool enable_dictionary, + CompressionKind compression_kind, + size_t compression_blocksize, + statistics_freq stats_freq, + bool single_write_mode, + data_sink const& out_sink, + rmm::cuda_stream_view stream) { auto const input_tview = table_device_view::create(input, stream); @@ -2298,7 +2283,7 @@ convert_table_to_orc_data(table_view const& input, std::move(orc_table), rmm::device_buffer{}, // compressed_data intermediate_statistics{stream}, - pinned_buffer{nullptr, cudaFreeHost}}; + cudf::detail::pinned_host_vector{}}; } // Allocate intermediate output stream buffer @@ -2333,16 +2318,7 @@ convert_table_to_orc_data(table_view const& input, max_stream_size = std::max(max_stream_size, stream_size); } - if (all_device_write) { - return pinned_buffer{nullptr, cudaFreeHost}; - } else { - return pinned_buffer{[](size_t size) { - uint8_t* ptr = nullptr; - CUDF_CUDA_TRY(cudaMallocHost(&ptr, size)); - return ptr; - }(max_stream_size), - cudaFreeHost}; - } + return cudf::detail::pinned_host_vector{all_device_write ? 0 : max_stream_size}; }(); // Compress the data streams @@ -2489,7 +2465,7 @@ void writer::impl::write(table_view const& input) orc_table, compressed_data, intermediate_stats, - stream_output.get()); + stream_output); // Update data into the footer. This needs to be called even when num_rows==0. add_table_to_footer_data(orc_table, stripes); @@ -2504,7 +2480,7 @@ void writer::impl::write_orc_data_to_sink(orc_streams& streams, orc_table_view const& orc_table, rmm::device_buffer const& compressed_data, intermediate_statistics& intermediate_stats, - uint8_t* stream_output) + host_span stream_output) { if (orc_table.num_rows() == 0) { return; } @@ -2544,7 +2520,7 @@ void writer::impl::write_orc_data_to_sink(orc_streams& streams, strm_desc, enc_data.streams[strm_desc.column_id][segmentation.stripes[stripe_id].first], static_cast(compressed_data.data()), - stream_output, + stream_output.data(), &stripe, &streams, _compression_kind, diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index f003de8087e..96c6593556f 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -313,7 +313,7 @@ class writer::impl { orc_table_view const& orc_table, rmm::device_buffer const& compressed_data, intermediate_statistics& intermediate_stats, - uint8_t* stream_output); + host_span stream_output); /** * @brief Add the processed table data into the internal file footer. diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 79581c0d21c..e2ead6f9985 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -67,11 +68,6 @@ using namespace cudf::io::parquet; using namespace cudf::io; namespace { -/** - * @brief Helper for pinned host memory - */ -template -using pinned_buffer = std::unique_ptr; /** * @brief Function that translates GDF compression to parquet compression @@ -1835,7 +1831,7 @@ void writer::impl::write(table_view const& table, std::vector co num_stats_bfr); } - pinned_buffer host_bfr{nullptr, cudaFreeHost}; + cudf::detail::pinned_host_vector host_bfr(max_chunk_bfr_size); // Encode row groups in batches for (auto b = 0, r = 0; b < static_cast(batch_list.size()); b++) { @@ -1889,25 +1885,17 @@ void writer::impl::write(table_view const& table, std::vector co _stream.synchronize(); } } else { - if (!host_bfr) { - host_bfr = pinned_buffer{[](size_t size) { - uint8_t* ptr = nullptr; - CUDF_CUDA_TRY(cudaMallocHost(&ptr, size)); - return ptr; - }(max_chunk_bfr_size), - cudaFreeHost}; - } // copy the full data - CUDF_CUDA_TRY(cudaMemcpyAsync(host_bfr.get(), + CUDF_CUDA_TRY(cudaMemcpyAsync(host_bfr.data(), dev_bfr, ck.ck_stat_size + ck.compressed_size, cudaMemcpyDefault, _stream.value())); _stream.synchronize(); - _out_sink[p]->host_write(host_bfr.get() + ck.ck_stat_size, ck.compressed_size); + _out_sink[p]->host_write(host_bfr.data() + ck.ck_stat_size, ck.compressed_size); if (ck.ck_stat_size != 0) { column_chunk_meta.statistics_blob.resize(ck.ck_stat_size); - memcpy(column_chunk_meta.statistics_blob.data(), host_bfr.get(), ck.ck_stat_size); + memcpy(column_chunk_meta.statistics_blob.data(), host_bfr.data(), ck.ck_stat_size); } } row_group.total_byte_size += ck.compressed_size; From 7358c7891071e80ce4b135f1628590c582800022 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 24 Apr 2023 10:06:22 -0700 Subject: [PATCH 06/18] Fix bug in type alias --- cpp/include/cudf/detail/utilities/pinned_host_vector.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp b/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp index 461e76bc218..83f061e9407 100644 --- a/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp +++ b/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp @@ -206,6 +206,6 @@ class pinned_allocator { * @brief A vector class with pinned host memory allocator */ template -using pinned_host_vector = thrust::host_vector>(); +using pinned_host_vector = thrust::host_vector>; } // namespace cudf::detail From a266de6a0fe2121cb8b39a213f0cefe3ef3582ce Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 24 Apr 2023 10:20:52 -0700 Subject: [PATCH 07/18] Fix bug and rename variable --- cpp/src/io/orc/writer_impl.cu | 56 +++++++++++++++++----------------- cpp/src/io/orc/writer_impl.hpp | 4 +-- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index d3f0d82927e..17312aa19bb 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -2273,17 +2273,17 @@ auto convert_table_to_orc_data(table_view const& input, auto stripes = gather_stripes(num_index_streams, segmentation, &enc_data, &strm_descs, stream); if (num_rows == 0) { - return {std::move(streams), - hostdevice_vector{}, // comp_results - std::move(strm_descs), - std::move(enc_data), - std::move(segmentation), - std::move(stripe_dict), - std::move(stripes), - std::move(orc_table), - rmm::device_buffer{}, // compressed_data - intermediate_statistics{stream}, - cudf::detail::pinned_host_vector{}}; + return std::tuple{std::move(streams), + hostdevice_vector{}, // comp_results + std::move(strm_descs), + std::move(enc_data), + std::move(segmentation), + std::move(stripe_dict), + std::move(stripes), + std::move(orc_table), + rmm::device_buffer{}, // compressed_data + intermediate_statistics{stream}, + cudf::detail::pinned_host_vector()}; } // Allocate intermediate output stream buffer @@ -2297,7 +2297,7 @@ auto convert_table_to_orc_data(table_view const& input, auto const padded_block_header_size = util::round_up_unsafe(block_header_size, compressed_block_align); - auto stream_output = [&]() { + auto bounce_buffer = [&]() { size_t max_stream_size = 0; bool all_device_write = true; @@ -2318,7 +2318,7 @@ auto convert_table_to_orc_data(table_view const& input, max_stream_size = std::max(max_stream_size, stream_size); } - return cudf::detail::pinned_host_vector{all_device_write ? 0 : max_stream_size}; + return cudf::detail::pinned_host_vector(all_device_write ? 0 : max_stream_size); }(); // Compress the data streams @@ -2350,17 +2350,17 @@ auto convert_table_to_orc_data(table_view const& input, auto intermediate_stats = gather_statistic_blobs(stats_freq, orc_table, segmentation, stream); - return {std::move(streams), - std::move(comp_results), - std::move(strm_descs), - std::move(enc_data), - std::move(segmentation), - std::move(stripe_dict), - std::move(stripes), - std::move(orc_table), - std::move(compressed_data), - std::move(intermediate_stats), - std::move(stream_output)}; + return std::tuple{std::move(streams), + std::move(comp_results), + std::move(strm_descs), + std::move(enc_data), + std::move(segmentation), + std::move(stripe_dict), + std::move(stripes), + std::move(orc_table), + std::move(compressed_data), + std::move(intermediate_stats), + std::move(bounce_buffer)}; } } // namespace @@ -2434,7 +2434,7 @@ void writer::impl::write(table_view const& input) orc_table, compressed_data, intermediate_stats, - stream_output] = [&] { + bounce_buffer] = [&] { try { return convert_table_to_orc_data(input, *_table_meta, @@ -2465,7 +2465,7 @@ void writer::impl::write(table_view const& input) orc_table, compressed_data, intermediate_stats, - stream_output); + bounce_buffer); // Update data into the footer. This needs to be called even when num_rows==0. add_table_to_footer_data(orc_table, stripes); @@ -2480,7 +2480,7 @@ void writer::impl::write_orc_data_to_sink(orc_streams& streams, orc_table_view const& orc_table, rmm::device_buffer const& compressed_data, intermediate_statistics& intermediate_stats, - host_span stream_output) + host_span bounce_buffer) { if (orc_table.num_rows() == 0) { return; } @@ -2520,7 +2520,7 @@ void writer::impl::write_orc_data_to_sink(orc_streams& streams, strm_desc, enc_data.streams[strm_desc.column_id][segmentation.stripes[stripe_id].first], static_cast(compressed_data.data()), - stream_output.data(), + bounce_buffer.data(), &stripe, &streams, _compression_kind, diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index 96c6593556f..533e60f0d0c 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -302,7 +302,7 @@ class writer::impl { * @param orc_table Non-owning view of a cuDF table that includes ORC-related information * @param compressed_data Compressed stream data * @param intermediate_stats Statistics data stored between calls to write - * @param stream_output Temporary host output buffer + * @param bounce_buffer Temporary host output buffer */ void write_orc_data_to_sink(orc_streams& streams, hostdevice_vector const& comp_results, @@ -313,7 +313,7 @@ class writer::impl { orc_table_view const& orc_table, rmm::device_buffer const& compressed_data, intermediate_statistics& intermediate_stats, - host_span stream_output); + host_span bounce_buffer); /** * @brief Add the processed table data into the internal file footer. From 4d436870c8729d1100d00ac367eda561d83dd1d2 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 24 Apr 2023 10:31:34 -0700 Subject: [PATCH 08/18] Switch to use `SingleWriteMode` --- cpp/src/io/orc/writer_impl.cu | 14 +++++++------- cpp/src/io/orc/writer_impl.hpp | 8 ++++---- cpp/src/io/parquet/writer_impl.cu | 10 +++++----- cpp/src/io/parquet/writer_impl.hpp | 6 +++--- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 17312aa19bb..5877b61d32a 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -375,11 +375,11 @@ __global__ void copy_string_data(char* string_pool, } // namespace void persisted_statistics::persist(int num_table_rows, - bool single_write_mode, + SingleWriteMode single_write_mode, intermediate_statistics& intermediate_stats, rmm::cuda_stream_view stream) { - if (not single_write_mode) { + if (single_write_mode == SingleWriteMode::YES) { // persist the strings in the chunks into a string pool and update pointers auto const num_chunks = static_cast(intermediate_stats.stripe_stat_chunks.size()); // min offset and max offset + 1 for total size @@ -666,7 +666,7 @@ orc_streams create_streams(host_span columns, std::map const& decimal_column_sizes, bool enable_dictionary, CompressionKind compression_kind, - bool single_write_mode) + SingleWriteMode single_write_mode) { // 'column 0' row index stream std::vector streams{{ROW_INDEX, 0}}; // TODO: Separate index and data streams? @@ -681,7 +681,7 @@ orc_streams create_streams(host_span columns, for (auto& column : columns) { auto const is_nullable = [&]() -> bool { - if (single_write_mode) { + if (single_write_mode == SingleWriteMode::YES) { return column.nullable(); } else { // For chunked write, when not provided nullability, we assume the worst case scenario @@ -2200,7 +2200,7 @@ auto convert_table_to_orc_data(table_view const& input, CompressionKind compression_kind, size_t compression_blocksize, statistics_freq stats_freq, - bool single_write_mode, + SingleWriteMode single_write_mode, data_sink const& out_sink, rmm::cuda_stream_view stream) { @@ -2375,7 +2375,7 @@ writer::impl::impl(std::unique_ptr sink, _compression_kind(to_orc_compression(options.get_compression())), _compression_blocksize(compression_block_size(_compression_kind)), _stats_freq(options.get_statistics_freq()), - _single_write_mode(mode == SingleWriteMode::YES), + _single_write_mode(mode), _kv_meta(options.get_key_value_metadata()), _out_sink(std::move(sink)) { @@ -2395,7 +2395,7 @@ writer::impl::impl(std::unique_ptr sink, _compression_kind(to_orc_compression(options.get_compression())), _compression_blocksize(compression_block_size(_compression_kind)), _stats_freq(options.get_statistics_freq()), - _single_write_mode(mode == SingleWriteMode::YES), + _single_write_mode(mode), _kv_meta(options.get_key_value_metadata()), _out_sink(std::move(sink)) { diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index 533e60f0d0c..3d4e8ab7bf3 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -209,7 +209,7 @@ struct persisted_statistics { } void persist(int num_table_rows, - bool single_write_mode, + SingleWriteMode single_write_mode, intermediate_statistics& intermediate_stats, rmm::cuda_stream_view stream); @@ -334,9 +334,9 @@ class writer::impl { CompressionKind const _compression_kind; size_t const _compression_blocksize; statistics_freq const _stats_freq; - bool const _single_write_mode; // Special parameter only used by `write()` to indicate that - // we are guaranteeing a single table write. This enables some - // internal optimizations. + SingleWriteMode const _single_write_mode; // Special parameter only used by `write()` to indicate + // that we are guaranteeing a single table write. This + // enables some internal optimizations. std::map const _kv_meta; // Optional user metadata. std::unique_ptr const _out_sink; diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index e2ead6f9985..b80016ebc48 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -506,7 +506,7 @@ struct leaf_schema_fn { inline bool is_col_nullable(cudf::detail::LinkedColPtr const& col, column_in_metadata const& col_meta, - bool single_write_mode) + SingleWriteMode single_write_mode) { if (col_meta.is_nullability_defined()) { CUDF_EXPECTS(col_meta.nullable() || !col->nullable(), @@ -516,7 +516,7 @@ inline bool is_col_nullable(cudf::detail::LinkedColPtr const& col, } // For chunked write, when not provided nullability, we assume the worst case scenario // that all columns are nullable. - return not single_write_mode or col->nullable(); + return single_write_mode == SingleWriteMode::NO or col->nullable(); } /** @@ -528,7 +528,7 @@ inline bool is_col_nullable(cudf::detail::LinkedColPtr const& col, std::vector construct_schema_tree( cudf::detail::LinkedColVector const& linked_columns, table_input_metadata& metadata, - bool single_write_mode, + SingleWriteMode single_write_mode, bool int96_timestamps) { std::vector schema; @@ -1330,7 +1330,7 @@ writer::impl::impl(std::vector> sinks, _int96_timestamps(options.is_enabled_int96_timestamps()), _column_index_truncate_length(options.get_column_index_truncate_length()), _kv_meta(options.get_key_value_metadata()), - _single_write_mode(mode == SingleWriteMode::YES), + _single_write_mode(mode), _out_sink(std::move(sinks)) { if (options.get_metadata()) { @@ -1356,7 +1356,7 @@ writer::impl::impl(std::vector> sinks, _int96_timestamps(options.is_enabled_int96_timestamps()), _column_index_truncate_length(options.get_column_index_truncate_length()), _kv_meta(options.get_key_value_metadata()), - _single_write_mode(mode == SingleWriteMode::YES), + _single_write_mode(mode), _out_sink(std::move(sinks)) { if (options.get_metadata()) { diff --git a/cpp/src/io/parquet/writer_impl.hpp b/cpp/src/io/parquet/writer_impl.hpp index 34ad95f3ee4..d4fdb9a8813 100644 --- a/cpp/src/io/parquet/writer_impl.hpp +++ b/cpp/src/io/parquet/writer_impl.hpp @@ -229,9 +229,9 @@ class writer::impl { bool const _int96_timestamps; int32_t const _column_index_truncate_length; std::vector> const _kv_meta; // Optional user metadata. - bool const _single_write_mode = true; // Special parameter only used by `write()` to indicate - // that we are guaranteeing a single table write. - // This enables some internal optimizations. + SingleWriteMode const _single_write_mode; // Special parameter only used by `write()` to indicate + // that we are guaranteeing a single table write. + // This enables some internal optimizations. std::vector> const _out_sink; // Internal states, filled during `write()` and written to sink during `write` and `close()`. From d33ad7d4c6700baf001f29ccfe05316a9bbbaf71 Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Mon, 24 Apr 2023 12:08:18 -0600 Subject: [PATCH 09/18] Update cpp/src/io/orc/writer_impl.cu Co-authored-by: Yunsong Wang --- cpp/src/io/orc/writer_impl.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 5877b61d32a..333fa141aea 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -379,7 +379,7 @@ void persisted_statistics::persist(int num_table_rows, intermediate_statistics& intermediate_stats, rmm::cuda_stream_view stream) { - if (single_write_mode == SingleWriteMode::YES) { + if (single_write_mode == SingleWriteMode::NO) { // persist the strings in the chunks into a string pool and update pointers auto const num_chunks = static_cast(intermediate_stats.stripe_stat_chunks.size()); // min offset and max offset + 1 for total size From 155f99f1c022ac7da542eac21ca30f1b86651601 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 24 Apr 2023 11:13:44 -0700 Subject: [PATCH 10/18] Rename variable --- cpp/src/io/orc/writer_impl.cu | 14 +++++++------- cpp/src/io/orc/writer_impl.hpp | 2 +- cpp/src/io/parquet/writer_impl.cu | 10 +++++----- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 333fa141aea..525bc5701ca 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -375,11 +375,11 @@ __global__ void copy_string_data(char* string_pool, } // namespace void persisted_statistics::persist(int num_table_rows, - SingleWriteMode single_write_mode, + SingleWriteMode write_mode, intermediate_statistics& intermediate_stats, rmm::cuda_stream_view stream) { - if (single_write_mode == SingleWriteMode::NO) { + if (write_mode == SingleWriteMode::NO) { // persist the strings in the chunks into a string pool and update pointers auto const num_chunks = static_cast(intermediate_stats.stripe_stat_chunks.size()); // min offset and max offset + 1 for total size @@ -666,7 +666,7 @@ orc_streams create_streams(host_span columns, std::map const& decimal_column_sizes, bool enable_dictionary, CompressionKind compression_kind, - SingleWriteMode single_write_mode) + SingleWriteMode write_mode) { // 'column 0' row index stream std::vector streams{{ROW_INDEX, 0}}; // TODO: Separate index and data streams? @@ -681,7 +681,7 @@ orc_streams create_streams(host_span columns, for (auto& column : columns) { auto const is_nullable = [&]() -> bool { - if (single_write_mode == SingleWriteMode::YES) { + if (write_mode == SingleWriteMode::YES) { return column.nullable(); } else { // For chunked write, when not provided nullability, we assume the worst case scenario @@ -2187,7 +2187,7 @@ std::unique_ptr make_table_meta(table_view const& input) * @param compression_kind The compression kind * @param compression_blocksize The block size used for compression * @param stats_freq Column statistics granularity type for parquet/orc writers - * @param single_write_mode Flag to indicate if there is only a single table write + * @param write_mode Flag to indicate if there is only a single table write * @param out_sink Sink for writing data * @param stream CUDA stream used for device memory operations and kernel launches * @return A tuple of the intermediate results containing the processed data @@ -2200,7 +2200,7 @@ auto convert_table_to_orc_data(table_view const& input, CompressionKind compression_kind, size_t compression_blocksize, statistics_freq stats_freq, - SingleWriteMode single_write_mode, + SingleWriteMode write_mode, data_sink const& out_sink, rmm::cuda_stream_view stream) { @@ -2254,7 +2254,7 @@ auto convert_table_to_orc_data(table_view const& input, decimal_column_sizes(dec_chunk_sizes.rg_sizes), enable_dictionary, compression_kind, - single_write_mode); + write_mode); auto enc_data = encode_columns(orc_table, std::move(dictionaries), std::move(dec_chunk_sizes), diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index 3d4e8ab7bf3..f4fb8207671 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -209,7 +209,7 @@ struct persisted_statistics { } void persist(int num_table_rows, - SingleWriteMode single_write_mode, + SingleWriteMode write_mode, intermediate_statistics& intermediate_stats, rmm::cuda_stream_view stream); diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index b80016ebc48..8b8c2be346f 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -506,7 +506,7 @@ struct leaf_schema_fn { inline bool is_col_nullable(cudf::detail::LinkedColPtr const& col, column_in_metadata const& col_meta, - SingleWriteMode single_write_mode) + SingleWriteMode write_mode) { if (col_meta.is_nullability_defined()) { CUDF_EXPECTS(col_meta.nullable() || !col->nullable(), @@ -516,7 +516,7 @@ inline bool is_col_nullable(cudf::detail::LinkedColPtr const& col, } // For chunked write, when not provided nullability, we assume the worst case scenario // that all columns are nullable. - return single_write_mode == SingleWriteMode::NO or col->nullable(); + return write_mode == SingleWriteMode::NO or col->nullable(); } /** @@ -528,7 +528,7 @@ inline bool is_col_nullable(cudf::detail::LinkedColPtr const& col, std::vector construct_schema_tree( cudf::detail::LinkedColVector const& linked_columns, table_input_metadata& metadata, - SingleWriteMode single_write_mode, + SingleWriteMode write_mode, bool int96_timestamps) { std::vector schema; @@ -542,7 +542,7 @@ std::vector construct_schema_tree( std::function add_schema = [&](cudf::detail::LinkedColPtr const& col, column_in_metadata& col_meta, size_t parent_idx) { - bool col_nullable = is_col_nullable(col, col_meta, single_write_mode); + bool col_nullable = is_col_nullable(col, col_meta, write_mode); auto set_field_id = [&schema, parent_idx](schema_tree_node& s, column_in_metadata const& col_meta) { @@ -679,7 +679,7 @@ std::vector construct_schema_tree( right_child_meta.set_name("value"); // check the repetition type of key is required i.e. the col should be non-nullable auto key_col = col->children[lists_column_view::child_column_index]->children[0]; - CUDF_EXPECTS(!is_col_nullable(key_col, left_child_meta, single_write_mode), + CUDF_EXPECTS(!is_col_nullable(key_col, left_child_meta, write_mode), "key column cannot be nullable. For chunked writing, explicitly set the " "nullability to false in metadata"); // process key From 4d110d17838516af98665ae0e0b1397f229d5f69 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 24 Apr 2023 11:16:16 -0700 Subject: [PATCH 11/18] Rename `SingleWriteMode` into `single_write_mode` --- cpp/include/cudf/io/detail/orc.hpp | 4 ++-- cpp/include/cudf/io/detail/parquet.hpp | 4 ++-- cpp/include/cudf/io/detail/utils.hpp | 2 +- cpp/src/io/functions.cpp | 8 ++++---- cpp/src/io/orc/writer_impl.cu | 18 +++++++++--------- cpp/src/io/orc/writer_impl.hpp | 12 ++++++------ cpp/src/io/parquet/writer_impl.cu | 14 +++++++------- cpp/src/io/parquet/writer_impl.hpp | 10 +++++----- 8 files changed, 36 insertions(+), 36 deletions(-) diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index 1a53690e317..b7794c0df6a 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -96,7 +96,7 @@ class writer { */ explicit writer(std::unique_ptr sink, orc_writer_options const& options, - SingleWriteMode mode, + single_write_mode mode, rmm::cuda_stream_view stream); /** @@ -109,7 +109,7 @@ class writer { */ explicit writer(std::unique_ptr sink, chunked_orc_writer_options const& options, - SingleWriteMode mode, + single_write_mode mode, rmm::cuda_stream_view stream); /** diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp index 304d42a29c9..9a94924824d 100644 --- a/cpp/include/cudf/io/detail/parquet.hpp +++ b/cpp/include/cudf/io/detail/parquet.hpp @@ -156,7 +156,7 @@ class writer { */ explicit writer(std::vector> sinks, parquet_writer_options const& options, - SingleWriteMode mode, + single_write_mode mode, rmm::cuda_stream_view stream); /** @@ -171,7 +171,7 @@ class writer { */ explicit writer(std::vector> sinks, chunked_parquet_writer_options const& options, - SingleWriteMode mode, + single_write_mode mode, rmm::cuda_stream_view stream); /** diff --git a/cpp/include/cudf/io/detail/utils.hpp b/cpp/include/cudf/io/detail/utils.hpp index adb7078d96d..e84da92da6c 100644 --- a/cpp/include/cudf/io/detail/utils.hpp +++ b/cpp/include/cudf/io/detail/utils.hpp @@ -22,7 +22,7 @@ namespace detail { /** * @brief Whether writer writes in chunks or all at once */ -enum class SingleWriteMode : bool { YES, NO }; +enum class single_write_mode : bool { YES, NO }; } // namespace detail } // namespace io } // namespace cudf diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index 76c50d548f9..ddbbce53bab 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -428,7 +428,7 @@ void write_orc(orc_writer_options const& options) CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing"); auto writer = std::make_unique( - std::move(sinks[0]), options, io_detail::SingleWriteMode::YES, cudf::get_default_stream()); + std::move(sinks[0]), options, io_detail::single_write_mode::YES, cudf::get_default_stream()); writer->write(options.get_table()); } @@ -444,7 +444,7 @@ orc_chunked_writer::orc_chunked_writer(chunked_orc_writer_options const& options CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing"); writer = std::make_unique( - std::move(sinks[0]), options, io_detail::SingleWriteMode::NO, cudf::get_default_stream()); + std::move(sinks[0]), options, io_detail::single_write_mode::NO, cudf::get_default_stream()); } /** @@ -519,7 +519,7 @@ std::unique_ptr> write_parquet(parquet_writer_options const auto sinks = make_datasinks(options.get_sink()); auto writer = std::make_unique( - std::move(sinks), options, io_detail::SingleWriteMode::YES, cudf::get_default_stream()); + std::move(sinks), options, io_detail::single_write_mode::YES, cudf::get_default_stream()); writer->write(options.get_table(), options.get_partitions()); @@ -575,7 +575,7 @@ parquet_chunked_writer::parquet_chunked_writer(chunked_parquet_writer_options co auto sinks = make_datasinks(options.get_sink()); writer = std::make_unique( - std::move(sinks), options, io_detail::SingleWriteMode::NO, cudf::get_default_stream()); + std::move(sinks), options, io_detail::single_write_mode::NO, cudf::get_default_stream()); } /** diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 525bc5701ca..53aa8d2400f 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -375,11 +375,11 @@ __global__ void copy_string_data(char* string_pool, } // namespace void persisted_statistics::persist(int num_table_rows, - SingleWriteMode write_mode, + single_write_mode write_mode, intermediate_statistics& intermediate_stats, rmm::cuda_stream_view stream) { - if (write_mode == SingleWriteMode::NO) { + if (write_mode == single_write_mode::NO) { // persist the strings in the chunks into a string pool and update pointers auto const num_chunks = static_cast(intermediate_stats.stripe_stat_chunks.size()); // min offset and max offset + 1 for total size @@ -666,7 +666,7 @@ orc_streams create_streams(host_span columns, std::map const& decimal_column_sizes, bool enable_dictionary, CompressionKind compression_kind, - SingleWriteMode write_mode) + single_write_mode write_mode) { // 'column 0' row index stream std::vector streams{{ROW_INDEX, 0}}; // TODO: Separate index and data streams? @@ -681,7 +681,7 @@ orc_streams create_streams(host_span columns, for (auto& column : columns) { auto const is_nullable = [&]() -> bool { - if (write_mode == SingleWriteMode::YES) { + if (write_mode == single_write_mode::YES) { return column.nullable(); } else { // For chunked write, when not provided nullability, we assume the worst case scenario @@ -2200,7 +2200,7 @@ auto convert_table_to_orc_data(table_view const& input, CompressionKind compression_kind, size_t compression_blocksize, statistics_freq stats_freq, - SingleWriteMode write_mode, + single_write_mode write_mode, data_sink const& out_sink, rmm::cuda_stream_view stream) { @@ -2367,7 +2367,7 @@ auto convert_table_to_orc_data(table_view const& input, writer::impl::impl(std::unique_ptr sink, orc_writer_options const& options, - SingleWriteMode mode, + single_write_mode mode, rmm::cuda_stream_view stream) : _stream(stream), _max_stripe_size{options.get_stripe_size_bytes(), options.get_stripe_size_rows()}, @@ -2387,7 +2387,7 @@ writer::impl::impl(std::unique_ptr sink, writer::impl::impl(std::unique_ptr sink, chunked_orc_writer_options const& options, - SingleWriteMode mode, + single_write_mode mode, rmm::cuda_stream_view stream) : _stream(stream), _max_stripe_size{options.get_stripe_size_bytes(), options.get_stripe_size_rows()}, @@ -2688,7 +2688,7 @@ void writer::impl::close() // Forward to implementation writer::writer(std::unique_ptr sink, orc_writer_options const& options, - SingleWriteMode mode, + single_write_mode mode, rmm::cuda_stream_view stream) : _impl(std::make_unique(std::move(sink), options, mode, stream)) { @@ -2697,7 +2697,7 @@ writer::writer(std::unique_ptr sink, // Forward to implementation writer::writer(std::unique_ptr sink, chunked_orc_writer_options const& options, - SingleWriteMode mode, + single_write_mode mode, rmm::cuda_stream_view stream) : _impl(std::make_unique(std::move(sink), options, mode, stream)) { diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index f4fb8207671..472b1294992 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -209,7 +209,7 @@ struct persisted_statistics { } void persist(int num_table_rows, - SingleWriteMode write_mode, + single_write_mode write_mode, intermediate_statistics& intermediate_stats, rmm::cuda_stream_view stream); @@ -248,7 +248,7 @@ class writer::impl { */ explicit impl(std::unique_ptr sink, orc_writer_options const& options, - SingleWriteMode mode, + single_write_mode mode, rmm::cuda_stream_view stream); /** @@ -261,7 +261,7 @@ class writer::impl { */ explicit impl(std::unique_ptr sink, chunked_orc_writer_options const& options, - SingleWriteMode mode, + single_write_mode mode, rmm::cuda_stream_view stream); /** @@ -334,9 +334,9 @@ class writer::impl { CompressionKind const _compression_kind; size_t const _compression_blocksize; statistics_freq const _stats_freq; - SingleWriteMode const _single_write_mode; // Special parameter only used by `write()` to indicate - // that we are guaranteeing a single table write. This - // enables some internal optimizations. + single_write_mode const _single_write_mode; // Special parameter only used by `write()` to + // indicate that we are guaranteeing a single table + // write. This enables some internal optimizations. std::map const _kv_meta; // Optional user metadata. std::unique_ptr const _out_sink; diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 8b8c2be346f..48108ca2669 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -506,7 +506,7 @@ struct leaf_schema_fn { inline bool is_col_nullable(cudf::detail::LinkedColPtr const& col, column_in_metadata const& col_meta, - SingleWriteMode write_mode) + single_write_mode write_mode) { if (col_meta.is_nullability_defined()) { CUDF_EXPECTS(col_meta.nullable() || !col->nullable(), @@ -516,7 +516,7 @@ inline bool is_col_nullable(cudf::detail::LinkedColPtr const& col, } // For chunked write, when not provided nullability, we assume the worst case scenario // that all columns are nullable. - return write_mode == SingleWriteMode::NO or col->nullable(); + return write_mode == single_write_mode::NO or col->nullable(); } /** @@ -528,7 +528,7 @@ inline bool is_col_nullable(cudf::detail::LinkedColPtr const& col, std::vector construct_schema_tree( cudf::detail::LinkedColVector const& linked_columns, table_input_metadata& metadata, - SingleWriteMode write_mode, + single_write_mode write_mode, bool int96_timestamps) { std::vector schema; @@ -1315,7 +1315,7 @@ size_t writer::impl::column_index_buffer_size(gpu::EncColumnChunk* ck) const writer::impl::impl(std::vector> sinks, parquet_writer_options const& options, - SingleWriteMode mode, + single_write_mode mode, rmm::cuda_stream_view stream) : _stream(stream), _compression(to_parquet_compression(options.get_compression())), @@ -1341,7 +1341,7 @@ writer::impl::impl(std::vector> sinks, writer::impl::impl(std::vector> sinks, chunked_parquet_writer_options const& options, - SingleWriteMode mode, + single_write_mode mode, rmm::cuda_stream_view stream) : _stream(stream), _compression(to_parquet_compression(options.get_compression())), @@ -2042,7 +2042,7 @@ std::unique_ptr> writer::impl::close( // Forward to implementation writer::writer(std::vector> sinks, parquet_writer_options const& options, - SingleWriteMode mode, + single_write_mode mode, rmm::cuda_stream_view stream) : _impl(std::make_unique(std::move(sinks), options, mode, stream)) { @@ -2050,7 +2050,7 @@ writer::writer(std::vector> sinks, writer::writer(std::vector> sinks, chunked_parquet_writer_options const& options, - SingleWriteMode mode, + single_write_mode mode, rmm::cuda_stream_view stream) : _impl(std::make_unique(std::move(sinks), options, mode, stream)) { diff --git a/cpp/src/io/parquet/writer_impl.hpp b/cpp/src/io/parquet/writer_impl.hpp index d4fdb9a8813..c88287994a1 100644 --- a/cpp/src/io/parquet/writer_impl.hpp +++ b/cpp/src/io/parquet/writer_impl.hpp @@ -68,7 +68,7 @@ class writer::impl { */ explicit impl(std::vector> sinks, parquet_writer_options const& options, - SingleWriteMode mode, + single_write_mode mode, rmm::cuda_stream_view stream); /** @@ -81,7 +81,7 @@ class writer::impl { */ explicit impl(std::vector> sinks, chunked_parquet_writer_options const& options, - SingleWriteMode mode, + single_write_mode mode, rmm::cuda_stream_view stream); /** @@ -229,9 +229,9 @@ class writer::impl { bool const _int96_timestamps; int32_t const _column_index_truncate_length; std::vector> const _kv_meta; // Optional user metadata. - SingleWriteMode const _single_write_mode; // Special parameter only used by `write()` to indicate - // that we are guaranteeing a single table write. - // This enables some internal optimizations. + single_write_mode const _single_write_mode; // Special parameter only used by `write()` to + // indicate that we are guaranteeing a single table + // write. This enables some internal optimizations. std::vector> const _out_sink; // Internal states, filled during `write()` and written to sink during `write` and `close()`. From 21c3139d1b8332e0f4fa709895cfa4738700d2b8 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 24 Apr 2023 11:25:01 -0700 Subject: [PATCH 12/18] Fix copyright year --- cpp/include/cudf/io/detail/utils.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/io/detail/utils.hpp b/cpp/include/cudf/io/detail/utils.hpp index e84da92da6c..7bbda21858d 100644 --- a/cpp/include/cudf/io/detail/utils.hpp +++ b/cpp/include/cudf/io/detail/utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 2b01e442b839d09b06bf8bf508257cc5dbc3b321 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 24 Apr 2023 11:46:52 -0700 Subject: [PATCH 13/18] Switch to use `host_span` --- cpp/src/io/orc/writer_impl.cu | 2 +- cpp/src/io/orc/writer_impl.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 53aa8d2400f..8f7a28b2d8c 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -2476,7 +2476,7 @@ void writer::impl::write_orc_data_to_sink(orc_streams& streams, hostdevice_2dvector const& strm_descs, encoded_data const& enc_data, file_segmentation const& segmentation, - std::vector& stripes, + host_span stripes, orc_table_view const& orc_table, rmm::device_buffer const& compressed_data, intermediate_statistics& intermediate_stats, diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index 472b1294992..a063506f55a 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -309,7 +309,7 @@ class writer::impl { hostdevice_2dvector const& strm_descs, encoded_data const& enc_data, file_segmentation const& segmentation, - std::vector& stripes, + host_span stripes, orc_table_view const& orc_table, rmm::device_buffer const& compressed_data, intermediate_statistics& intermediate_stats, From 2230619075158a234e130ab57fb219ee6fe06b0f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 24 Apr 2023 11:50:57 -0700 Subject: [PATCH 14/18] Reorganize parameters --- cpp/src/io/orc/writer_impl.cu | 12 ++++++------ cpp/src/io/orc/writer_impl.hpp | 26 +++++++++++++------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 8f7a28b2d8c..41588ec9d88 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -2456,30 +2456,30 @@ void writer::impl::write(table_view const& input) }(); // Compression/encoding were all successful. Now write the intermediate results. - write_orc_data_to_sink(streams, - comp_results, + write_orc_data_to_sink(comp_results, strm_descs, enc_data, segmentation, - stripes, orc_table, compressed_data, intermediate_stats, + streams, + stripes, bounce_buffer); // Update data into the footer. This needs to be called even when num_rows==0. add_table_to_footer_data(orc_table, stripes); } -void writer::impl::write_orc_data_to_sink(orc_streams& streams, - hostdevice_vector const& comp_results, +void writer::impl::write_orc_data_to_sink(hostdevice_vector const& comp_results, hostdevice_2dvector const& strm_descs, encoded_data const& enc_data, file_segmentation const& segmentation, - host_span stripes, orc_table_view const& orc_table, rmm::device_buffer const& compressed_data, intermediate_statistics& intermediate_stats, + orc_streams& streams, + host_span stripes, host_span bounce_buffer) { if (orc_table.num_rows() == 0) { return; } diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index a063506f55a..0e8c70c9351 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -293,26 +293,26 @@ class writer::impl { * The intermediate data is generated from processing (compressing/encoding) an cuDF input table * by `process_for_write` called in the `write()` function. * - * @param streams List of stream descriptors - * @param comp_results Status of data compression - * @param strm_descs List of stream descriptors - * @param enc_data ORC per-chunk streams of encoded data - * @param segmentation Description of how the ORC file is segmented into stripes and rowgroups - * @param stripes List of stripe description - * @param orc_table Non-owning view of a cuDF table that includes ORC-related information - * @param compressed_data Compressed stream data - * @param intermediate_stats Statistics data stored between calls to write - * @param bounce_buffer Temporary host output buffer + * @param[in] comp_results Status of data compression + * @param[in] strm_descs List of stream descriptors + * @param[in] enc_data ORC per-chunk streams of encoded data + * @param[in] segmentation Description of how the ORC file is segmented into stripes and rowgroups + * @param[in] orc_table Non-owning view of a cuDF table that includes ORC-related information + * @param[in] compressed_data Compressed stream data + * @param[in,out] intermediate_stats Statistics data stored between calls to write + * @param[in,out] streams List of stream descriptors + * @param[in,out] stripes List of stripe description + * @param[in,out] bounce_buffer Temporary host output buffer */ - void write_orc_data_to_sink(orc_streams& streams, - hostdevice_vector const& comp_results, + void write_orc_data_to_sink(hostdevice_vector const& comp_results, hostdevice_2dvector const& strm_descs, encoded_data const& enc_data, file_segmentation const& segmentation, - host_span stripes, orc_table_view const& orc_table, rmm::device_buffer const& compressed_data, intermediate_statistics& intermediate_stats, + orc_streams& streams, + host_span stripes, host_span bounce_buffer); /** From 9170fef683ff981c3f09fdf686a8ae26db57b2d0 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 24 Apr 2023 12:05:22 -0700 Subject: [PATCH 15/18] More span adoption --- cpp/src/io/orc/writer_impl.cu | 32 +++++++++++++++++--------------- cpp/src/io/orc/writer_impl.hpp | 12 ++++++------ 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 41588ec9d88..70e15c96bb8 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -2456,27 +2456,29 @@ void writer::impl::write(table_view const& input) }(); // Compression/encoding were all successful. Now write the intermediate results. - write_orc_data_to_sink(comp_results, - strm_descs, - enc_data, - segmentation, - orc_table, - compressed_data, - intermediate_stats, - streams, - stripes, - bounce_buffer); + write_orc_data_to_sink( + enc_data, + segmentation, + orc_table, + device_span(reinterpret_cast(compressed_data.data()), + compressed_data.size()), + comp_results, + strm_descs, + intermediate_stats, + streams, + stripes, + bounce_buffer); // Update data into the footer. This needs to be called even when num_rows==0. add_table_to_footer_data(orc_table, stripes); } -void writer::impl::write_orc_data_to_sink(hostdevice_vector const& comp_results, - hostdevice_2dvector const& strm_descs, - encoded_data const& enc_data, +void writer::impl::write_orc_data_to_sink(encoded_data const& enc_data, file_segmentation const& segmentation, orc_table_view const& orc_table, - rmm::device_buffer const& compressed_data, + device_span compressed_data, + host_span comp_results, + host_2dspan strm_descs, intermediate_statistics& intermediate_stats, orc_streams& streams, host_span stripes, @@ -2519,7 +2521,7 @@ void writer::impl::write_orc_data_to_sink(hostdevice_vector write_tasks.push_back(write_data_stream( strm_desc, enc_data.streams[strm_desc.column_id][segmentation.stripes[stripe_id].first], - static_cast(compressed_data.data()), + compressed_data.data(), bounce_buffer.data(), &stripe, &streams, diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index 0e8c70c9351..5cabfd66d95 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -293,23 +293,23 @@ class writer::impl { * The intermediate data is generated from processing (compressing/encoding) an cuDF input table * by `process_for_write` called in the `write()` function. * - * @param[in] comp_results Status of data compression - * @param[in] strm_descs List of stream descriptors * @param[in] enc_data ORC per-chunk streams of encoded data * @param[in] segmentation Description of how the ORC file is segmented into stripes and rowgroups * @param[in] orc_table Non-owning view of a cuDF table that includes ORC-related information * @param[in] compressed_data Compressed stream data + * @param[in] comp_results Status of data compression + * @param[in] strm_descs List of stream descriptors * @param[in,out] intermediate_stats Statistics data stored between calls to write * @param[in,out] streams List of stream descriptors * @param[in,out] stripes List of stripe description * @param[in,out] bounce_buffer Temporary host output buffer */ - void write_orc_data_to_sink(hostdevice_vector const& comp_results, - hostdevice_2dvector const& strm_descs, - encoded_data const& enc_data, + void write_orc_data_to_sink(encoded_data const& enc_data, file_segmentation const& segmentation, orc_table_view const& orc_table, - rmm::device_buffer const& compressed_data, + device_span compressed_data, + host_span comp_results, + host_2dspan strm_descs, intermediate_statistics& intermediate_stats, orc_streams& streams, host_span stripes, From 9cc92ddef815dd68f3b1966c8e57c3469a5f359d Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 24 Apr 2023 13:36:03 -0700 Subject: [PATCH 16/18] Fix docs --- cpp/src/io/orc/writer_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index 5cabfd66d95..2b04b418b41 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -302,7 +302,7 @@ class writer::impl { * @param[in,out] intermediate_stats Statistics data stored between calls to write * @param[in,out] streams List of stream descriptors * @param[in,out] stripes List of stripe description - * @param[in,out] bounce_buffer Temporary host output buffer + * @param[out] bounce_buffer Temporary host output buffer */ void write_orc_data_to_sink(encoded_data const& enc_data, file_segmentation const& segmentation, From 4282b9432f3b2fcf56bf8196f7593606ed04856c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 24 Apr 2023 13:44:37 -0700 Subject: [PATCH 17/18] Reorder parameters again --- cpp/src/io/orc/writer_impl.cu | 38 +++++++++++++++++------------------ 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 70e15c96bb8..c20a5ec96df 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -2273,16 +2273,16 @@ auto convert_table_to_orc_data(table_view const& input, auto stripes = gather_stripes(num_index_streams, segmentation, &enc_data, &strm_descs, stream); if (num_rows == 0) { - return std::tuple{std::move(streams), - hostdevice_vector{}, // comp_results - std::move(strm_descs), - std::move(enc_data), + return std::tuple{std::move(enc_data), std::move(segmentation), - std::move(stripe_dict), - std::move(stripes), std::move(orc_table), - rmm::device_buffer{}, // compressed_data + rmm::device_buffer{}, // compressed_data + hostdevice_vector{}, // comp_results + std::move(strm_descs), intermediate_statistics{stream}, + std::move(streams), + std::move(stripes), + std::move(stripe_dict), cudf::detail::pinned_host_vector()}; } @@ -2350,16 +2350,16 @@ auto convert_table_to_orc_data(table_view const& input, auto intermediate_stats = gather_statistic_blobs(stats_freq, orc_table, segmentation, stream); - return std::tuple{std::move(streams), - std::move(comp_results), - std::move(strm_descs), - std::move(enc_data), + return std::tuple{std::move(enc_data), std::move(segmentation), - std::move(stripe_dict), - std::move(stripes), std::move(orc_table), std::move(compressed_data), + std::move(comp_results), + std::move(strm_descs), std::move(intermediate_stats), + std::move(streams), + std::move(stripes), + std::move(stripe_dict), std::move(bounce_buffer)}; } @@ -2424,16 +2424,16 @@ void writer::impl::write(table_view const& input) // is still intact. // Note that `out_sink_` is intentionally passed by const reference to prevent accidentally // writing anything to it. - [[maybe_unused]] auto [streams, - comp_results, - strm_descs, - enc_data, + [[maybe_unused]] auto [enc_data, segmentation, - stripe_dict, /* unused, but its data will be accessed via pointer later */ - stripes, orc_table, compressed_data, + comp_results, + strm_descs, intermediate_stats, + streams, + stripes, + stripe_dict, /* unused, but its data will be accessed via pointer later */ bounce_buffer] = [&] { try { return convert_table_to_orc_data(input, From 156463c7d6a2286f2452dc84450efa316b711a46 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 24 Apr 2023 13:55:29 -0700 Subject: [PATCH 18/18] Simplify code by changing `device_buffer` into `device_uvector` --- cpp/src/io/orc/writer_impl.cu | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index c20a5ec96df..fef1bb23733 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -2276,7 +2276,7 @@ auto convert_table_to_orc_data(table_view const& input, return std::tuple{std::move(enc_data), std::move(segmentation), std::move(orc_table), - rmm::device_buffer{}, // compressed_data + rmm::device_uvector{0, stream}, // compressed_data hostdevice_vector{}, // comp_results std::move(strm_descs), intermediate_statistics{stream}, @@ -2322,7 +2322,7 @@ auto convert_table_to_orc_data(table_view const& input, }(); // Compress the data streams - rmm::device_buffer compressed_data(compressed_bfr_size, stream); + rmm::device_uvector compressed_data(compressed_bfr_size, stream); hostdevice_vector comp_results(num_compressed_blocks, stream); thrust::fill(rmm::exec_policy(stream), comp_results.d_begin(), @@ -2330,7 +2330,7 @@ auto convert_table_to_orc_data(table_view const& input, compression_result{0, compression_status::FAILURE}); if (compression_kind != NONE) { strm_descs.host_to_device(stream); - gpu::CompressOrcDataStreams(static_cast(compressed_data.data()), + gpu::CompressOrcDataStreams(compressed_data.data(), num_compressed_blocks, compression_kind, compression_blocksize, @@ -2456,18 +2456,16 @@ void writer::impl::write(table_view const& input) }(); // Compression/encoding were all successful. Now write the intermediate results. - write_orc_data_to_sink( - enc_data, - segmentation, - orc_table, - device_span(reinterpret_cast(compressed_data.data()), - compressed_data.size()), - comp_results, - strm_descs, - intermediate_stats, - streams, - stripes, - bounce_buffer); + write_orc_data_to_sink(enc_data, + segmentation, + orc_table, + compressed_data, + comp_results, + strm_descs, + intermediate_stats, + streams, + stripes, + bounce_buffer); // Update data into the footer. This needs to be called even when num_rows==0. add_table_to_footer_data(orc_table, stripes);