From bf55bd4e9bb742117fd6d860af431e9fc8792c3d Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Tue, 21 Feb 2023 02:53:38 +0000 Subject: [PATCH] Moving read out a layer so it is only called once so coalescing into larger reads is possbile --- cpp/src/io/parquet/reader_impl_preprocess.cu | 22 ++++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index 6577a1a3f0f..b1d013a96a3 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -685,7 +685,6 @@ void reader::impl::load_and_decompress_data(std::vector const& r auto const row_group_start = rg.start_row; auto const row_group_source = rg.source_index; auto const row_group_rows = std::min(remaining_rows, row_group.num_rows); - auto const io_chunk_idx = chunks.size(); // generate ColumnChunkDesc objects for everything to be decoded (all input columns) for (size_t i = 0; i < num_input_columns; ++i) { @@ -733,18 +732,19 @@ void reader::impl::load_and_decompress_data(std::vector const& r total_decompressed_size += col_meta.total_uncompressed_size; } } - // Read compressed chunk data to device memory - read_rowgroup_tasks.push_back(read_column_chunks_async(_sources, - raw_page_data, - chunks, - io_chunk_idx, - chunks.size(), - column_chunk_offsets, - chunk_source_map, - _stream)); - remaining_rows -= row_group.num_rows; } + + // Read compressed chunk data to device memory + read_rowgroup_tasks.push_back(read_column_chunks_async(_sources, + raw_page_data, + chunks, + 0, + chunks.size(), + column_chunk_offsets, + chunk_source_map, + _stream)); + for (auto& task : read_rowgroup_tasks) { task.wait(); }