From a5164da9719b9ca5f3dd76d003ee72910f889993 Mon Sep 17 00:00:00 2001 From: seidl Date: Wed, 24 May 2023 10:24:29 -0700 Subject: [PATCH 1/4] fix invalid use of std::exclusive_scan --- cpp/src/io/parquet/writer_impl.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 05d42cd9e2f..be95ab0c58e 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -1710,10 +1710,10 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, size_type const total_frags = [&]() { if (frags_per_column.size() > 0) { std::exclusive_scan(frags_per_column.data(), - frags_per_column.data() + num_columns + 1, + frags_per_column.data() + num_columns, std::back_inserter(frag_offsets), 0); - return frag_offsets[num_columns]; + return frag_offsets[num_columns - 1] + frags_per_column[num_columns - 1]; } else { return 0; } From d8b4092d6f823144e684e45308702ee9840d0f08 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Wed, 24 May 2023 11:26:18 -0700 Subject: [PATCH 2/4] Update cpp/src/io/parquet/writer_impl.cu Co-authored-by: Nghia Truong <7416935+ttnghia@users.noreply.github.com> --- cpp/src/io/parquet/writer_impl.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index be95ab0c58e..5e82b40989e 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -1713,7 +1713,7 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, frags_per_column.data() + num_columns, std::back_inserter(frag_offsets), 0); - return frag_offsets[num_columns - 1] + frags_per_column[num_columns - 1]; + return frag_offsets.back() + frags_per_column.back(); } else { return 0; } From b4a3161f9f41803b1600ec51d76123d52e21be8b Mon Sep 17 00:00:00 2001 From: seidl Date: Wed, 24 May 2023 11:43:09 -0700 Subject: [PATCH 3/4] review suggestion --- cpp/src/io/parquet/writer_impl.cu | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 5e82b40989e..015f12daf01 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -1706,13 +1706,11 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, // fragments with a (potentially) varying number of fragments per column. // first figure out the total number of fragments and calculate the start offset for each column - std::vector frag_offsets; + std::vector frag_offsets(num_columns, 0); size_type const total_frags = [&]() { if (frags_per_column.size() > 0) { - std::exclusive_scan(frags_per_column.data(), - frags_per_column.data() + num_columns, - std::back_inserter(frag_offsets), - 0); + std::exclusive_scan( + frags_per_column.begin(), frags_per_column.end(), frag_offsets.begin(), 0); return frag_offsets.back() + frags_per_column.back(); } else { return 0; From f6277f0cb73dc63d61ef2dedae4424bd396b9182 Mon Sep 17 00:00:00 2001 From: seidl Date: Wed, 24 May 2023 16:44:38 -0700 Subject: [PATCH 4/4] clean up total_frags calculation --- cpp/src/io/parquet/writer_impl.cu | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 015f12daf01..a4b3c15ddff 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -1707,15 +1707,9 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, // first figure out the total number of fragments and calculate the start offset for each column std::vector frag_offsets(num_columns, 0); - size_type const total_frags = [&]() { - if (frags_per_column.size() > 0) { - std::exclusive_scan( - frags_per_column.begin(), frags_per_column.end(), frag_offsets.begin(), 0); - return frag_offsets.back() + frags_per_column.back(); - } else { - return 0; - } - }(); + std::exclusive_scan(frags_per_column.begin(), frags_per_column.end(), frag_offsets.begin(), 0); + size_type const total_frags = + frags_per_column.empty() ? 0 : frag_offsets.back() + frags_per_column.back(); rmm::device_uvector frag_stats(0, stream); hostdevice_vector page_fragments(total_frags, stream);