From d2f62a85fc63a356138e9901d15db30283ecd32f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 08:54:03 -0800 Subject: [PATCH 001/321] Implement a map from to compression info Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.hpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 44ece671155..3fd252be73b 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -25,12 +25,27 @@ #include #include +#include +#include + namespace cudf::io::orc::detail { /** * @brief Struct to store file-level data that remains constant for all chunks being read. */ struct file_intermediate_data { + using chunk_index = std::tuple; + using chunk_comp_info = std::tuple; + + struct index_hash { + std::size_t operator()(chunk_index const& index) const + { + return std::hash()(std::get<0>(index)) ^ std::hash()(std::get<1>(index)) ^ + std::hash()(std::get<2>(index)); + } + }; + std::unordered_map compinfo_map; + std::vector> lvl_stripe_data; std::vector>> null_count_prefix_sums; From b8e1fd79860313b62f2c02ab7fbc595e812a7b29 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 10:49:28 -0800 Subject: [PATCH 002/321] Change benchmark Signed-off-by: Nghia Truong --- cpp/benchmarks/CMakeLists.txt | 2 +- cpp/benchmarks/io/orc/orc_reader_input.cpp | 33 ++++++++-------------- 2 files changed, 13 insertions(+), 22 deletions(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 35b03fa33d0..84f583e8ed2 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -255,7 +255,7 @@ ConfigureNVBench( # ################################################################################################## # * orc reader benchmark -------------------------------------------------------------------------- -ConfigureNVBench(ORC_READER_NVBENCH io/orc/orc_reader_input.cpp io/orc/orc_reader_options.cpp) +ConfigureNVBench(ORC_READER_NVBENCH io/orc/orc_reader_input.cpp) # ################################################################################################## # * csv reader benchmark -------------------------------------------------------------------------- diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index fdb7dbe59b8..fd27b56ef0e 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -88,9 +88,6 @@ void BM_orc_read_io_compression( nvbench::type_list, nvbench::enum_type>) { auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL_SIGNED), - static_cast(data_type::FLOAT), - static_cast(data_type::DECIMAL), - static_cast(data_type::TIMESTAMP), static_cast(data_type::STRING), static_cast(data_type::LIST), static_cast(data_type::STRUCT)}); @@ -116,29 +113,23 @@ void BM_orc_read_io_compression( orc_read_common(num_rows_written, source_sink, state); } -using d_type_list = nvbench::enum_type_list; +using d_type_list = nvbench:: + enum_type_list; -using io_list = nvbench::enum_type_list; +using io_list = + nvbench::enum_type_list; using compression_list = nvbench::enum_type_list; -NVBENCH_BENCH_TYPES(BM_orc_read_data, - NVBENCH_TYPE_AXES(d_type_list, - nvbench::enum_type_list)) - .set_name("orc_read_decode") - .set_type_axes_names({"data_type", "io"}) - .set_min_samples(4) - .add_int64_axis("cardinality", {0, 1000}) - .add_int64_axis("run_length", {1, 32}); +// NVBENCH_BENCH_TYPES(BM_orc_read_data, +// NVBENCH_TYPE_AXES(d_type_list, +// nvbench::enum_type_list)) +// .set_name("orc_read_decode") +// .set_type_axes_names({"data_type", "io"}) +// .set_min_samples(4) +// .add_int64_axis("cardinality", {0, 1000}) +// .add_int64_axis("run_length", {1, 32}); NVBENCH_BENCH_TYPES(BM_orc_read_io_compression, NVBENCH_TYPE_AXES(io_list, compression_list)) .set_name("orc_read_io_compression") From f6f479c758f9d613357e8ed0ee3577e853f08de5 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 10:49:39 -0800 Subject: [PATCH 003/321] Add comment Signed-off-by: Nghia Truong --- cpp/src/io/orc/aggregate_orc_metadata.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/io/orc/aggregate_orc_metadata.cpp b/cpp/src/io/orc/aggregate_orc_metadata.cpp index 8cae1ff5309..02bf74e9c01 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.cpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.cpp @@ -187,6 +187,8 @@ aggregate_orc_metadata::select_stripes( "Invalid stripe index"); stripe_infos.push_back( std::pair(&per_file_metadata[src_file_idx].ff.stripes[stripe_idx], nullptr)); + + // TODO: check for overflow here. rows_to_read += per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows; } selected_stripes_mapping.push_back({static_cast(src_file_idx), stripe_infos}); From bd308e649506edc014883b27ec145f39caabf6c1 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 10:49:49 -0800 Subject: [PATCH 004/321] Implementing query for stripe sizes Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.hpp | 5 + cpp/src/io/orc/reader_impl_chunking.hpp | 1 + cpp/src/io/orc/reader_impl_preprocess.cu | 330 +++++++++++++++++++++++ 3 files changed, 336 insertions(+) diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 6561c08f2d9..b0869125fe9 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -77,6 +77,11 @@ class reader::impl { std::optional const& num_rows_opt, std::vector> const& stripes); + /** + * @brief Compute stripe sizes. + */ + void query_stripe_compression_info(); + /** * @brief Create the output table metadata from file metadata. * diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 3fd252be73b..43883b34077 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -45,6 +45,7 @@ struct file_intermediate_data { } }; std::unordered_map compinfo_map; + bool compinfo_ready{false}; std::vector> lvl_stripe_data; std::vector>> null_count_prefix_sums; diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 179afa12bd5..9af2bdb2aa8 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -646,6 +646,7 @@ void aggregate_child_meta(std::size_t level, for (size_type id = 0; id < p_col.num_children; id++) { auto const child_col_idx = index + id; + // TODO: Check for overflow here. num_child_rows[child_col_idx] += child_rows; num_child_rows_per_stripe[stripe_id][child_col_idx] = child_rows; // start row could be different for each column when there is nesting at each stripe level @@ -697,6 +698,332 @@ void generate_offsets_for_list(host_span buff_data, rmm::cuda_ } // namespace +void reader::impl::query_stripe_compression_info() +{ + if (_file_itm_data->compinfo_ready) { return; } + if (_selected_columns.num_levels() == 0) { return; } + + auto const rows_to_skip = _file_itm_data->rows_to_skip; + auto const rows_to_read = _file_itm_data->rows_to_read; + auto const& selected_stripes = _file_itm_data->selected_stripes; + + // If no rows or stripes to read, return empty columns + // TODO : remove? + if (rows_to_read == 0 || selected_stripes.empty()) { return; } + + // Set up table for converting timestamp columns from local to UTC time + auto const tz_table = [&, &selected_stripes = selected_stripes] { + auto const has_timestamp_column = std::any_of( + _selected_columns.levels.cbegin(), _selected_columns.levels.cend(), [&](auto const& col_lvl) { + return std::any_of(col_lvl.cbegin(), col_lvl.cend(), [&](auto const& col_meta) { + return _metadata.get_col_type(col_meta.id).kind == TypeKind::TIMESTAMP; + }); + }); + + return has_timestamp_column + ? cudf::detail::make_timezone_transition_table( + {}, selected_stripes[0].stripe_info[0].second->writerTimezone, _stream) + : std::make_unique(); + }(); + + auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; + auto& null_count_prefix_sums = _file_itm_data->null_count_prefix_sums; + lvl_stripe_data.resize(_selected_columns.num_levels()); + + _out_buffers.resize(_selected_columns.num_levels()); + + // Iterates through levels of nested columns, child column will be one level down + // compared to parent column. + auto& col_meta = *_col_meta; + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + auto& columns_level = _selected_columns.levels[level]; + // Association between each ORC column and its cudf::column + col_meta.orc_col_map.emplace_back(_metadata.get_num_cols(), -1); + std::vector nested_cols; + + // Get a list of column data types + std::vector column_types; + for (auto& col : columns_level) { + auto col_type = to_cudf_type(_metadata.get_col_type(col.id).kind, + _use_np_dtypes, + _timestamp_type.id(), + to_cudf_decimal_type(_decimal128_columns, _metadata, col.id)); + CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type"); + if (col_type == type_id::DECIMAL32 or col_type == type_id::DECIMAL64 or + col_type == type_id::DECIMAL128) { + // sign of the scale is changed since cuDF follows c++ libraries like CNL + // which uses negative scaling, but liborc and other libraries + // follow positive scaling. + auto const scale = + -static_cast(_metadata.get_col_type(col.id).scale.value_or(0)); + column_types.emplace_back(col_type, scale); + } else { + column_types.emplace_back(col_type); + } + + // Map each ORC column to its column + col_meta.orc_col_map[level][col.id] = column_types.size() - 1; + if (col_type == type_id::LIST or col_type == type_id::STRUCT) { + nested_cols.emplace_back(col); + } + } + + // Get the total number of stripes across all input files. + std::size_t total_num_stripes = + std::accumulate(selected_stripes.begin(), + selected_stripes.end(), + 0, + [](std::size_t sum, auto& stripe_source_mapping) { + return sum + stripe_source_mapping.stripe_info.size(); + }); + auto const num_columns = columns_level.size(); + cudf::detail::hostdevice_2dvector chunks( + total_num_stripes, num_columns, _stream); + memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); + + const bool use_index = + _use_index && + // Do stripes have row group index + _metadata.is_row_grp_idx_present() && + // Only use if we don't have much work with complete columns & stripes + // TODO: Consider nrows, gpu, and tune the threshold + (rows_to_read > _metadata.get_row_index_stride() && !(_metadata.get_row_index_stride() & 7) && + _metadata.get_row_index_stride() > 0 && num_columns * total_num_stripes < 8 * 128) && + // Only use if first row is aligned to a stripe boundary + // TODO: Fix logic to handle unaligned rows + (rows_to_skip == 0); + + // Logically view streams as columns + std::vector stream_info; + + null_count_prefix_sums.emplace_back(); + null_count_prefix_sums.back().reserve(_selected_columns.levels[level].size()); + std::generate_n(std::back_inserter(null_count_prefix_sums.back()), + _selected_columns.levels[level].size(), + [&]() { + return cudf::detail::make_zeroed_device_uvector_async( + total_num_stripes, _stream, rmm::mr::get_current_device_resource()); + }); + + // Tracker for eventually deallocating compressed and uncompressed data + auto& stripe_data = lvl_stripe_data[level]; + + std::size_t stripe_start_row = 0; + std::size_t num_dict_entries = 0; + std::size_t num_rowgroups = 0; + int stripe_idx = 0; + + std::vector, std::size_t>> read_tasks; + for (auto const& stripe_source_mapping : selected_stripes) { + // Iterate through the source files selected stripes + for (auto const& stripe : stripe_source_mapping.stripe_info) { + auto const stripe_info = stripe.first; + auto const stripe_footer = stripe.second; + + auto stream_count = stream_info.size(); + auto const total_data_size = gather_stream_info(stripe_idx, + stripe_info, + stripe_footer, + col_meta.orc_col_map[level], + _metadata.get_types(), + use_index, + level == 0, + &num_dict_entries, + stream_info, + chunks); + + auto const is_stripe_data_empty = total_data_size == 0; + CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, + "Invalid index rowgroup stream data"); + + // Buffer needs to be padded. + // Required by `copy_uncompressed_kernel`. + stripe_data.emplace_back( + cudf::util::round_up_safe(total_data_size, BUFFER_PADDING_MULTIPLE), _stream); + auto dst_base = static_cast(stripe_data.back().data()); + + // Coalesce consecutive streams into one read + while (not is_stripe_data_empty and stream_count < stream_info.size()) { + auto const d_dst = dst_base + stream_info[stream_count].dst_pos; + auto const offset = stream_info[stream_count].offset; + auto len = stream_info[stream_count].length; + stream_count++; + + while (stream_count < stream_info.size() && + stream_info[stream_count].offset == offset + len) { + len += stream_info[stream_count].length; + stream_count++; + } + if (_metadata.per_file_metadata[stripe_source_mapping.source_idx] + .source->is_device_read_preferred(len)) { + read_tasks.push_back( + std::pair(_metadata.per_file_metadata[stripe_source_mapping.source_idx] + .source->device_read_async(offset, len, d_dst, _stream), + len)); + + } else { + auto const buffer = + _metadata.per_file_metadata[stripe_source_mapping.source_idx].source->host_read( + offset, len); + CUDF_EXPECTS(buffer->size() == len, "Unexpected discrepancy in bytes read."); + CUDF_CUDA_TRY( + cudaMemcpyAsync(d_dst, buffer->data(), len, cudaMemcpyDefault, _stream.value())); + _stream.synchronize(); + } + } + + auto const num_rows_per_stripe = stripe_info->numberOfRows; + auto const rowgroup_id = num_rowgroups; + auto stripe_num_rowgroups = 0; + if (use_index) { + stripe_num_rowgroups = (num_rows_per_stripe + _metadata.get_row_index_stride() - 1) / + _metadata.get_row_index_stride(); + } + // Update chunks to reference streams pointers + for (std::size_t col_idx = 0; col_idx < num_columns; col_idx++) { + auto& chunk = chunks[stripe_idx][col_idx]; + // start row, number of rows in a each stripe and total number of rows + // may change in lower levels of nesting + chunk.start_row = (level == 0) + ? stripe_start_row + : col_meta.child_start_row[stripe_idx * num_columns + col_idx]; + chunk.num_rows = + (level == 0) ? stripe_info->numberOfRows + : col_meta.num_child_rows_per_stripe[stripe_idx * num_columns + col_idx]; + chunk.column_num_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[col_idx]; + chunk.parent_validity_info = + (level == 0) ? column_validity_info{} : col_meta.parent_column_data[col_idx]; + chunk.parent_null_count_prefix_sums = + (level == 0) + ? nullptr + : null_count_prefix_sums[level - 1][col_meta.parent_column_index[col_idx]].data(); + chunk.encoding_kind = stripe_footer->columns[columns_level[col_idx].id].kind; + chunk.type_kind = _metadata.per_file_metadata[stripe_source_mapping.source_idx] + .ff.types[columns_level[col_idx].id] + .kind; + // num_child_rows for a struct column will be same, for other nested types it will be + // calculated. + chunk.num_child_rows = (chunk.type_kind != orc::STRUCT) ? 0 : chunk.num_rows; + chunk.dtype_id = column_types[col_idx].id(); + chunk.decimal_scale = _metadata.per_file_metadata[stripe_source_mapping.source_idx] + .ff.types[columns_level[col_idx].id] + .scale.value_or(0); + + chunk.rowgroup_id = rowgroup_id; + chunk.dtype_len = (column_types[col_idx].id() == type_id::STRING) + ? sizeof(string_index_pair) + : ((column_types[col_idx].id() == type_id::LIST) or + (column_types[col_idx].id() == type_id::STRUCT)) + ? sizeof(size_type) + : cudf::size_of(column_types[col_idx]); + chunk.num_rowgroups = stripe_num_rowgroups; + if (chunk.type_kind == orc::TIMESTAMP) { chunk.timestamp_type_id = _timestamp_type.id(); } + if (not is_stripe_data_empty) { + for (int k = 0; k < gpu::CI_NUM_STREAMS; k++) { + chunk.streams[k] = dst_base + stream_info[chunk.strm_id[k]].dst_pos; + } + } + } + stripe_start_row += num_rows_per_stripe; + num_rowgroups += stripe_num_rowgroups; + + stripe_idx++; + } + } + for (auto& task : read_tasks) { + CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); + } + + if (stripe_data.empty()) { continue; } + + // Process dataset chunk pages into output columns + auto row_groups = + cudf::detail::hostdevice_2dvector(num_rowgroups, num_columns, _stream); + if (level > 0 and row_groups.size().first) { + cudf::host_span row_groups_span(row_groups.base_host_ptr(), + num_rowgroups * num_columns); + auto& rw_grp_meta = col_meta.rwgrp_meta; + + // Update start row and num rows per row group + std::transform(rw_grp_meta.begin(), + rw_grp_meta.end(), + row_groups_span.begin(), + rw_grp_meta.begin(), + [&](auto meta, auto& row_grp) { + row_grp.num_rows = meta.num_rows; + row_grp.start_row = meta.start_row; + return meta; + }); + } + // Setup row group descriptors if using indexes + if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { + auto decomp_data = decompress_stripe_data(*_metadata.per_file_metadata[0].decompressor, + stripe_data, + stream_info, + chunks, + row_groups, + total_num_stripes, + _metadata.get_row_index_stride(), + level == 0, + _stream); + stripe_data.clear(); + stripe_data.push_back(std::move(decomp_data)); + } else { + // Set decompressed data size equal to the input size. + // TODO + } + + for (std::size_t i = 0; i < column_types.size(); ++i) { + bool is_nullable = false; + for (std::size_t j = 0; j < total_num_stripes; ++j) { + if (chunks[j][i].strm_len[gpu::CI_PRESENT] != 0) { + is_nullable = true; + break; + } + } + auto is_list_type = (column_types[i].id() == type_id::LIST); + auto n_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[i]; + // For list column, offset column will be always size + 1 + if (is_list_type) n_rows++; + _out_buffers[level].emplace_back(column_types[i], n_rows, is_nullable, _stream, _mr); + } + + decode_stream_data(num_dict_entries, + rows_to_skip, + _metadata.get_row_index_stride(), + level, + tz_table->view(), + chunks, + row_groups, + _out_buffers[level], + _stream, + _mr); + + if (nested_cols.size()) { + // Extract information to process nested child columns + scan_null_counts(chunks, null_count_prefix_sums[level], _stream); + + row_groups.device_to_host_sync(_stream); + aggregate_child_meta( + level, _selected_columns, chunks, row_groups, nested_cols, _out_buffers[level], col_meta); + + // ORC stores number of elements at each row, so we need to generate offsets from that + std::vector buff_data; + std::for_each( + _out_buffers[level].begin(), _out_buffers[level].end(), [&buff_data](auto& out_buffer) { + if (out_buffer.type.id() == type_id::LIST) { + auto data = static_cast(out_buffer.data()); + buff_data.emplace_back(list_buffer_data{data, out_buffer.size}); + } + }); + + if (not buff_data.empty()) { generate_offsets_for_list(buff_data, _stream); } + } + } // end loop level + + _file_itm_data->compinfo_ready = true; +} + void reader::impl::prepare_data(uint64_t skip_rows, std::optional const& num_rows_opt, std::vector> const& stripes) @@ -722,6 +1049,8 @@ void reader::impl::prepare_data(uint64_t skip_rows, // If no rows or stripes to read, return empty columns if (rows_to_read == 0 || selected_stripes.empty()) { return; } + // query_stripe_compression_info(); + // Set up table for converting timestamp columns from local to UTC time auto const tz_table = [&, &selected_stripes = selected_stripes] { auto const has_timestamp_column = std::any_of( @@ -996,6 +1325,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, } } + for (std::size_t i = 0; i < column_types.size(); ++i) { bool is_nullable = false; for (std::size_t j = 0; j < total_num_stripes; ++j) { From 3ad2a6f5cecabee5e7d365002aeaf9b0acfb1d08 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 11:29:56 -0800 Subject: [PATCH 005/321] Remove redundant code Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_preprocess.cu | 270 ++++++++--------------- 1 file changed, 91 insertions(+), 179 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 9af2bdb2aa8..3c392d25d30 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -64,7 +64,7 @@ struct orc_stream_info { uint64_t offset; // offset in file std::size_t dst_pos; // offset in memory relative to start of compressed stripe data std::size_t length; // length in file - uint32_t stripe_idx; // stripe index + uint32_t stripe_idx; // stripe processing index, not stripe index in source }; /** @@ -74,12 +74,45 @@ std::size_t gather_stream_info(std::size_t stripe_index, orc::StripeInformation const* stripeinfo, orc::StripeFooter const* stripefooter, host_span orc2gdf, - host_span types, - bool use_index, - bool apply_struct_map, - std::size_t* num_dictionary_entries, - std::vector& stream_info, - cudf::detail::hostdevice_2dvector& chunks) + std::vector& stream_info) +{ + uint64_t src_offset = 0; + uint64_t dst_offset = 0; + + for (auto const& stream : stripefooter->streams) { + if (!stream.column_id || *stream.column_id >= orc2gdf.size()) { + dst_offset += stream.length; + continue; + } + + auto const column_id = *stream.column_id; + auto col = orc2gdf[column_id]; + + if (col != -1) { + stream_info.emplace_back( + stripeinfo->offset + src_offset, dst_offset, stream.length, stripe_index); + dst_offset += stream.length; + } + src_offset += stream.length; + } + + return dst_offset; +} + +/** + * @brief Function that populates column descriptors stream/chunk + */ +std::size_t gather_stream_info_and_update_chunks( + std::size_t stripe_index, + orc::StripeInformation const* stripeinfo, + orc::StripeFooter const* stripefooter, + host_span orc2gdf, + host_span types, + bool use_index, + bool apply_struct_map, + std::size_t* num_dictionary_entries, + std::vector& stream_info, + cudf::detail::hostdevice_2dvector& chunks) { uint64_t src_offset = 0; uint64_t dst_offset = 0; @@ -726,12 +759,9 @@ void reader::impl::query_stripe_compression_info() : std::make_unique(); }(); - auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; - auto& null_count_prefix_sums = _file_itm_data->null_count_prefix_sums; + auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; lvl_stripe_data.resize(_selected_columns.num_levels()); - _out_buffers.resize(_selected_columns.num_levels()); - // Iterates through levels of nested columns, child column will be one level down // compared to parent column. auto& col_meta = *_col_meta; @@ -796,22 +826,10 @@ void reader::impl::query_stripe_compression_info() // Logically view streams as columns std::vector stream_info; - null_count_prefix_sums.emplace_back(); - null_count_prefix_sums.back().reserve(_selected_columns.levels[level].size()); - std::generate_n(std::back_inserter(null_count_prefix_sums.back()), - _selected_columns.levels[level].size(), - [&]() { - return cudf::detail::make_zeroed_device_uvector_async( - total_num_stripes, _stream, rmm::mr::get_current_device_resource()); - }); - // Tracker for eventually deallocating compressed and uncompressed data auto& stripe_data = lvl_stripe_data[level]; - std::size_t stripe_start_row = 0; - std::size_t num_dict_entries = 0; - std::size_t num_rowgroups = 0; - int stripe_idx = 0; + int stripe_idx = 0; std::vector, std::size_t>> read_tasks; for (auto const& stripe_source_mapping : selected_stripes) { @@ -821,16 +839,8 @@ void reader::impl::query_stripe_compression_info() auto const stripe_footer = stripe.second; auto stream_count = stream_info.size(); - auto const total_data_size = gather_stream_info(stripe_idx, - stripe_info, - stripe_footer, - col_meta.orc_col_map[level], - _metadata.get_types(), - use_index, - level == 0, - &num_dict_entries, - stream_info, - chunks); + auto const total_data_size = gather_stream_info( + stripe_idx, stripe_info, stripe_footer, col_meta.orc_col_map[level], stream_info); auto const is_stripe_data_empty = total_data_size == 0; CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, @@ -872,61 +882,6 @@ void reader::impl::query_stripe_compression_info() } } - auto const num_rows_per_stripe = stripe_info->numberOfRows; - auto const rowgroup_id = num_rowgroups; - auto stripe_num_rowgroups = 0; - if (use_index) { - stripe_num_rowgroups = (num_rows_per_stripe + _metadata.get_row_index_stride() - 1) / - _metadata.get_row_index_stride(); - } - // Update chunks to reference streams pointers - for (std::size_t col_idx = 0; col_idx < num_columns; col_idx++) { - auto& chunk = chunks[stripe_idx][col_idx]; - // start row, number of rows in a each stripe and total number of rows - // may change in lower levels of nesting - chunk.start_row = (level == 0) - ? stripe_start_row - : col_meta.child_start_row[stripe_idx * num_columns + col_idx]; - chunk.num_rows = - (level == 0) ? stripe_info->numberOfRows - : col_meta.num_child_rows_per_stripe[stripe_idx * num_columns + col_idx]; - chunk.column_num_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[col_idx]; - chunk.parent_validity_info = - (level == 0) ? column_validity_info{} : col_meta.parent_column_data[col_idx]; - chunk.parent_null_count_prefix_sums = - (level == 0) - ? nullptr - : null_count_prefix_sums[level - 1][col_meta.parent_column_index[col_idx]].data(); - chunk.encoding_kind = stripe_footer->columns[columns_level[col_idx].id].kind; - chunk.type_kind = _metadata.per_file_metadata[stripe_source_mapping.source_idx] - .ff.types[columns_level[col_idx].id] - .kind; - // num_child_rows for a struct column will be same, for other nested types it will be - // calculated. - chunk.num_child_rows = (chunk.type_kind != orc::STRUCT) ? 0 : chunk.num_rows; - chunk.dtype_id = column_types[col_idx].id(); - chunk.decimal_scale = _metadata.per_file_metadata[stripe_source_mapping.source_idx] - .ff.types[columns_level[col_idx].id] - .scale.value_or(0); - - chunk.rowgroup_id = rowgroup_id; - chunk.dtype_len = (column_types[col_idx].id() == type_id::STRING) - ? sizeof(string_index_pair) - : ((column_types[col_idx].id() == type_id::LIST) or - (column_types[col_idx].id() == type_id::STRUCT)) - ? sizeof(size_type) - : cudf::size_of(column_types[col_idx]); - chunk.num_rowgroups = stripe_num_rowgroups; - if (chunk.type_kind == orc::TIMESTAMP) { chunk.timestamp_type_id = _timestamp_type.id(); } - if (not is_stripe_data_empty) { - for (int k = 0; k < gpu::CI_NUM_STREAMS; k++) { - chunk.streams[k] = dst_base + stream_info[chunk.strm_id[k]].dst_pos; - } - } - } - stripe_start_row += num_rows_per_stripe; - num_rowgroups += stripe_num_rowgroups; - stripe_idx++; } } @@ -936,91 +891,48 @@ void reader::impl::query_stripe_compression_info() if (stripe_data.empty()) { continue; } - // Process dataset chunk pages into output columns - auto row_groups = - cudf::detail::hostdevice_2dvector(num_rowgroups, num_columns, _stream); - if (level > 0 and row_groups.size().first) { - cudf::host_span row_groups_span(row_groups.base_host_ptr(), - num_rowgroups * num_columns); - auto& rw_grp_meta = col_meta.rwgrp_meta; - - // Update start row and num rows per row group - std::transform(rw_grp_meta.begin(), - rw_grp_meta.end(), - row_groups_span.begin(), - rw_grp_meta.begin(), - [&](auto meta, auto& row_grp) { - row_grp.num_rows = meta.num_rows; - row_grp.start_row = meta.start_row; - return meta; - }); - } // Setup row group descriptors if using indexes if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { - auto decomp_data = decompress_stripe_data(*_metadata.per_file_metadata[0].decompressor, - stripe_data, - stream_info, - chunks, - row_groups, - total_num_stripes, - _metadata.get_row_index_stride(), - level == 0, - _stream); - stripe_data.clear(); - stripe_data.push_back(std::move(decomp_data)); - } else { - // Set decompressed data size equal to the input size. - // TODO - } - - for (std::size_t i = 0; i < column_types.size(); ++i) { - bool is_nullable = false; - for (std::size_t j = 0; j < total_num_stripes; ++j) { - if (chunks[j][i].strm_len[gpu::CI_PRESENT] != 0) { - is_nullable = true; - break; - } + auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; + cudf::detail::hostdevice_vector compinfo( + 0, stream_info.size(), _stream); + for (auto const& info : stream_info) { + compinfo.push_back(gpu::CompressedStreamInfo( + static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, + info.length)); } - auto is_list_type = (column_types[i].id() == type_id::LIST); - auto n_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[i]; - // For list column, offset column will be always size + 1 - if (is_list_type) n_rows++; - _out_buffers[level].emplace_back(column_types[i], n_rows, is_nullable, _stream, _mr); - } - - decode_stream_data(num_dict_entries, - rows_to_skip, - _metadata.get_row_index_stride(), - level, - tz_table->view(), - chunks, - row_groups, - _out_buffers[level], - _stream, - _mr); - - if (nested_cols.size()) { - // Extract information to process nested child columns - scan_null_counts(chunks, null_count_prefix_sums[level], _stream); - - row_groups.device_to_host_sync(_stream); - aggregate_child_meta( - level, _selected_columns, chunks, row_groups, nested_cols, _out_buffers[level], col_meta); - - // ORC stores number of elements at each row, so we need to generate offsets from that - std::vector buff_data; - std::for_each( - _out_buffers[level].begin(), _out_buffers[level].end(), [&buff_data](auto& out_buffer) { - if (out_buffer.type.id() == type_id::LIST) { - auto data = static_cast(out_buffer.data()); - buff_data.emplace_back(list_buffer_data{data, out_buffer.size}); - } - }); + compinfo.host_to_device_async(_stream); + + gpu::ParseCompressedStripeData(compinfo.device_ptr(), + compinfo.size(), + decompressor.GetBlockSize(), + decompressor.GetLog2MaxCompressionRatio(), + _stream); + compinfo.device_to_host_sync(_stream); + + // Count the exact number of compressed blocks + std::size_t num_compressed_blocks = 0; + std::size_t num_uncompressed_blocks = 0; + std::size_t total_decomp_size = 0; + for (std::size_t i = 0; i < compinfo.size(); ++i) { + num_compressed_blocks += compinfo[i].num_compressed_blocks; + num_uncompressed_blocks += compinfo[i].num_uncompressed_blocks; + total_decomp_size += compinfo[i].max_uncompressed_size; + } + CUDF_EXPECTS( + not((num_uncompressed_blocks + num_compressed_blocks > 0) and (total_decomp_size == 0)), + "Inconsistent info on compression blocks"); + printf("compression correct\n"); + fflush(stdout); - if (not buff_data.empty()) { generate_offsets_for_list(buff_data, _stream); } + } else { + // Set decompressed data size equal to the input size. + // TODO } + } // end loop level + lvl_stripe_data.clear(); _file_itm_data->compinfo_ready = true; } @@ -1160,17 +1072,18 @@ void reader::impl::prepare_data(uint64_t skip_rows, auto const stripe_info = stripe.first; auto const stripe_footer = stripe.second; - auto stream_count = stream_info.size(); - auto const total_data_size = gather_stream_info(stripe_idx, - stripe_info, - stripe_footer, - col_meta.orc_col_map[level], - _metadata.get_types(), - use_index, - level == 0, - &num_dict_entries, - stream_info, - chunks); + auto stream_count = stream_info.size(); + auto const total_data_size = + gather_stream_info_and_update_chunks(stripe_idx, + stripe_info, + stripe_footer, + col_meta.orc_col_map[level], + _metadata.get_types(), + use_index, + level == 0, + &num_dict_entries, + stream_info, + chunks); auto const is_stripe_data_empty = total_data_size == 0; CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, @@ -1325,7 +1238,6 @@ void reader::impl::prepare_data(uint64_t skip_rows, } } - for (std::size_t i = 0; i < column_types.size(); ++i) { bool is_nullable = false; for (std::size_t j = 0; j < total_num_stripes; ++j) { From 47a66a3f38f67e7a9b39ad26de6c3663b2678888 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 11:41:41 -0800 Subject: [PATCH 006/321] Change comment Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_preprocess.cu | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 3c392d25d30..067a32a06d7 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -922,10 +922,11 @@ void reader::impl::query_stripe_compression_info() CUDF_EXPECTS( not((num_uncompressed_blocks + num_compressed_blocks > 0) and (total_decomp_size == 0)), "Inconsistent info on compression blocks"); - printf("compression correct\n"); - fflush(stdout); } else { + printf("no compression \n"); + fflush(stdout); + // Set decompressed data size equal to the input size. // TODO } @@ -961,7 +962,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, // If no rows or stripes to read, return empty columns if (rows_to_read == 0 || selected_stripes.empty()) { return; } - // query_stripe_compression_info(); + query_stripe_compression_info(); // Set up table for converting timestamp columns from local to UTC time auto const tz_table = [&, &selected_stripes = selected_stripes] { From 589a8423b79c97efa3f840210ce120ac05cc7a2b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 13:30:18 -0800 Subject: [PATCH 007/321] Cleanup Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_preprocess.cu | 38 ++---------------------- 1 file changed, 2 insertions(+), 36 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 067a32a06d7..b7c3fb3e732 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -769,33 +769,11 @@ void reader::impl::query_stripe_compression_info() auto& columns_level = _selected_columns.levels[level]; // Association between each ORC column and its cudf::column col_meta.orc_col_map.emplace_back(_metadata.get_num_cols(), -1); - std::vector nested_cols; - // Get a list of column data types - std::vector column_types; + size_type col_id{0}; for (auto& col : columns_level) { - auto col_type = to_cudf_type(_metadata.get_col_type(col.id).kind, - _use_np_dtypes, - _timestamp_type.id(), - to_cudf_decimal_type(_decimal128_columns, _metadata, col.id)); - CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type"); - if (col_type == type_id::DECIMAL32 or col_type == type_id::DECIMAL64 or - col_type == type_id::DECIMAL128) { - // sign of the scale is changed since cuDF follows c++ libraries like CNL - // which uses negative scaling, but liborc and other libraries - // follow positive scaling. - auto const scale = - -static_cast(_metadata.get_col_type(col.id).scale.value_or(0)); - column_types.emplace_back(col_type, scale); - } else { - column_types.emplace_back(col_type); - } - // Map each ORC column to its column - col_meta.orc_col_map[level][col.id] = column_types.size() - 1; - if (col_type == type_id::LIST or col_type == type_id::STRUCT) { - nested_cols.emplace_back(col); - } + col_meta.orc_col_map[level][col.id] = col_id++; } // Get the total number of stripes across all input files. @@ -811,18 +789,6 @@ void reader::impl::query_stripe_compression_info() total_num_stripes, num_columns, _stream); memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); - const bool use_index = - _use_index && - // Do stripes have row group index - _metadata.is_row_grp_idx_present() && - // Only use if we don't have much work with complete columns & stripes - // TODO: Consider nrows, gpu, and tune the threshold - (rows_to_read > _metadata.get_row_index_stride() && !(_metadata.get_row_index_stride() & 7) && - _metadata.get_row_index_stride() > 0 && num_columns * total_num_stripes < 8 * 128) && - // Only use if first row is aligned to a stripe boundary - // TODO: Fix logic to handle unaligned rows - (rows_to_skip == 0); - // Logically view streams as columns std::vector stream_info; From 9bc9ebfe5dea3baf4b0f8846abb33460f19959fd Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 14:25:15 -0800 Subject: [PATCH 008/321] Extend index Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.hpp | 32 ++++++++++++++++--------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 43883b34077..1c020d2ed16 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -30,21 +30,31 @@ namespace cudf::io::orc::detail { +using stream_index = struct { + std::size_t stripe_idx; + std::size_t level; + std::size_t col_idx; + std::size_t stream_idx; +}; +using stream_comp_info = struct { + std::size_t num_compressed_blocks; + std::size_t num_uncompressed_blocks; + std::size_t total_decomp_size; +}; +struct stream_index_hash { + std::size_t operator()(stream_index const& index) const + { + auto const hasher = std::hash{}; + return hasher(index.stripe_idx) ^ hasher(index.level) ^ hasher(index.col_idx) ^ + hasher(index.stream_idx); + } +}; + /** * @brief Struct to store file-level data that remains constant for all chunks being read. */ struct file_intermediate_data { - using chunk_index = std::tuple; - using chunk_comp_info = std::tuple; - - struct index_hash { - std::size_t operator()(chunk_index const& index) const - { - return std::hash()(std::get<0>(index)) ^ std::hash()(std::get<1>(index)) ^ - std::hash()(std::get<2>(index)); - } - }; - std::unordered_map compinfo_map; + std::unordered_map compinfo_map; bool compinfo_ready{false}; std::vector> lvl_stripe_data; From 0298430e4accd2c286daf9aa647f75b27b224d5a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 14:57:44 -0800 Subject: [PATCH 009/321] Compute stripe-level comp info Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.hpp | 23 +++++--- cpp/src/io/orc/reader_impl_preprocess.cu | 71 ++++++++++++++++-------- 2 files changed, 63 insertions(+), 31 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 1c020d2ed16..69fcd4d0772 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -30,23 +30,26 @@ namespace cudf::io::orc::detail { -using stream_index = struct { +struct stripe_level_index { std::size_t stripe_idx; std::size_t level; - std::size_t col_idx; - std::size_t stream_idx; }; -using stream_comp_info = struct { +struct stripe_level_comp_info { std::size_t num_compressed_blocks; std::size_t num_uncompressed_blocks; std::size_t total_decomp_size; }; -struct stream_index_hash { - std::size_t operator()(stream_index const& index) const +struct stripe_level_equal { + bool operator()(stripe_level_index const& lhs, stripe_level_index const& rhs) const + { + return lhs.stripe_idx == rhs.stripe_idx && lhs.level == rhs.level; + } +}; +struct stripe_level_hash { + std::size_t operator()(stripe_level_index const& index) const { auto const hasher = std::hash{}; - return hasher(index.stripe_idx) ^ hasher(index.level) ^ hasher(index.col_idx) ^ - hasher(index.stream_idx); + return hasher(index.stripe_idx) ^ hasher(index.level); } }; @@ -54,7 +57,9 @@ struct stream_index_hash { * @brief Struct to store file-level data that remains constant for all chunks being read. */ struct file_intermediate_data { - std::unordered_map compinfo_map; + std:: + unordered_map + compinfo_map; bool compinfo_ready{false}; std::vector> lvl_stripe_data; diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index b7c3fb3e732..dbc482c1725 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -67,14 +67,23 @@ struct orc_stream_info { uint32_t stripe_idx; // stripe processing index, not stripe index in source }; +struct stream_comp_info { + orc_stream_info* stream_info; + gpu::CompressedStreamInfo* comp_info; +}; + /** * @brief Function that populates column descriptors stream/chunk */ -std::size_t gather_stream_info(std::size_t stripe_index, - orc::StripeInformation const* stripeinfo, - orc::StripeFooter const* stripefooter, - host_span orc2gdf, - std::vector& stream_info) +std::size_t gather_stream_info( + std::size_t stripe_index, + std::size_t level, + orc::StripeInformation const* stripeinfo, + orc::StripeFooter const* stripefooter, + host_span orc2gdf, + std::vector& stream_info, + std::unordered_map& + stream_compinfo_map) { uint64_t src_offset = 0; uint64_t dst_offset = 0; @@ -86,11 +95,13 @@ std::size_t gather_stream_info(std::size_t stripe_index, } auto const column_id = *stream.column_id; - auto col = orc2gdf[column_id]; + auto const col_order = orc2gdf[column_id]; - if (col != -1) { + if (col_order != -1) { stream_info.emplace_back( stripeinfo->offset + src_offset, dst_offset, stream.length, stripe_index); + stream_compinfo_map[stripe_level_index{stripe_index, level}] = + stream_comp_info{&stream_info.back(), nullptr}; dst_offset += stream.length; } src_offset += stream.length; @@ -762,6 +773,9 @@ void reader::impl::query_stripe_compression_info() auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; lvl_stripe_data.resize(_selected_columns.num_levels()); + std::unordered_map + stream_compinfo_map; + // Iterates through levels of nested columns, child column will be one level down // compared to parent column. auto& col_meta = *_col_meta; @@ -805,8 +819,13 @@ void reader::impl::query_stripe_compression_info() auto const stripe_footer = stripe.second; auto stream_count = stream_info.size(); - auto const total_data_size = gather_stream_info( - stripe_idx, stripe_info, stripe_footer, col_meta.orc_col_map[level], stream_info); + auto const total_data_size = gather_stream_info(stripe_idx, + level, + stripe_info, + stripe_footer, + col_meta.orc_col_map[level], + stream_info, + stream_compinfo_map); auto const is_stripe_data_empty = total_data_size == 0; CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, @@ -862,11 +881,20 @@ void reader::impl::query_stripe_compression_info() auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; cudf::detail::hostdevice_vector compinfo( 0, stream_info.size(), _stream); - for (auto const& info : stream_info) { + + for (auto& [stripe_level, stripe_level_info] : stream_compinfo_map) { + auto const& info = *(stripe_level_info.stream_info); compinfo.push_back(gpu::CompressedStreamInfo( - static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, + static_cast(stripe_data[stripe_level.stripe_idx].data()) + info.dst_pos, info.length)); + stripe_level_info.comp_info = &compinfo[compinfo.size() - 1]; } + + // for (auto const& info : stream_info) { + // compinfo.push_back(gpu::CompressedStreamInfo( + // static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, + // info.length)); + // } compinfo.host_to_device_async(_stream); gpu::ParseCompressedStripeData(compinfo.device_ptr(), @@ -876,18 +904,17 @@ void reader::impl::query_stripe_compression_info() _stream); compinfo.device_to_host_sync(_stream); - // Count the exact number of compressed blocks - std::size_t num_compressed_blocks = 0; - std::size_t num_uncompressed_blocks = 0; - std::size_t total_decomp_size = 0; - for (std::size_t i = 0; i < compinfo.size(); ++i) { - num_compressed_blocks += compinfo[i].num_compressed_blocks; - num_uncompressed_blocks += compinfo[i].num_uncompressed_blocks; - total_decomp_size += compinfo[i].max_uncompressed_size; + auto& compinfo_map = _file_itm_data->compinfo_map; + for (auto& [stripe_level, stripe_level_info] : stream_compinfo_map) { + if (compinfo_map.find(stripe_level) == compinfo_map.end()) { + compinfo_map[stripe_level] = stripe_level_comp_info{0, 0}; + } + auto const& stream_compinfo = *stripe_level_info.comp_info; + compinfo_map[stripe_level].num_compressed_blocks += stream_compinfo.num_compressed_blocks; + compinfo_map[stripe_level].num_uncompressed_blocks += + stream_compinfo.num_uncompressed_blocks; + compinfo_map[stripe_level].total_decomp_size += stream_compinfo.max_uncompressed_size; } - CUDF_EXPECTS( - not((num_uncompressed_blocks + num_compressed_blocks > 0) and (total_decomp_size == 0)), - "Inconsistent info on compression blocks"); } else { printf("no compression \n"); From 6d5e45fc4638054ce06b27c592c00f417024d92c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 15:20:55 -0800 Subject: [PATCH 010/321] Successfully compute stripe-level comp info Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_preprocess.cu | 70 ++++++++++-------------- 1 file changed, 28 insertions(+), 42 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index dbc482c1725..8fb1e9a9031 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -57,33 +57,27 @@ struct orc_stream_info { explicit orc_stream_info(uint64_t offset_, std::size_t dst_pos_, uint32_t length_, - uint32_t stripe_idx_) - : offset(offset_), dst_pos(dst_pos_), length(length_), stripe_idx(stripe_idx_) + uint32_t stripe_idx_, + std::size_t level_) + : offset(offset_), dst_pos(dst_pos_), length(length_), stripe_idx(stripe_idx_), level(level_) { } uint64_t offset; // offset in file std::size_t dst_pos; // offset in memory relative to start of compressed stripe data std::size_t length; // length in file uint32_t stripe_idx; // stripe processing index, not stripe index in source -}; - -struct stream_comp_info { - orc_stream_info* stream_info; - gpu::CompressedStreamInfo* comp_info; + std::size_t level; // TODO }; /** * @brief Function that populates column descriptors stream/chunk */ -std::size_t gather_stream_info( - std::size_t stripe_index, - std::size_t level, - orc::StripeInformation const* stripeinfo, - orc::StripeFooter const* stripefooter, - host_span orc2gdf, - std::vector& stream_info, - std::unordered_map& - stream_compinfo_map) +std::size_t gather_stream_info(std::size_t stripe_index, + std::size_t level, + orc::StripeInformation const* stripeinfo, + orc::StripeFooter const* stripefooter, + host_span orc2gdf, + std::vector& stream_info) { uint64_t src_offset = 0; uint64_t dst_offset = 0; @@ -99,9 +93,7 @@ std::size_t gather_stream_info( if (col_order != -1) { stream_info.emplace_back( - stripeinfo->offset + src_offset, dst_offset, stream.length, stripe_index); - stream_compinfo_map[stripe_level_index{stripe_index, level}] = - stream_comp_info{&stream_info.back(), nullptr}; + stripeinfo->offset + src_offset, dst_offset, stream.length, stripe_index, level); dst_offset += stream.length; } src_offset += stream.length; @@ -115,6 +107,7 @@ std::size_t gather_stream_info( */ std::size_t gather_stream_info_and_update_chunks( std::size_t stripe_index, + std::size_t level, orc::StripeInformation const* stripeinfo, orc::StripeFooter const* stripefooter, host_span orc2gdf, @@ -188,7 +181,7 @@ std::size_t gather_stream_info_and_update_chunks( } } stream_info.emplace_back( - stripeinfo->offset + src_offset, dst_offset, stream.length, stripe_index); + stripeinfo->offset + src_offset, dst_offset, stream.length, stripe_index, level); dst_offset += stream.length; } src_offset += stream.length; @@ -773,7 +766,10 @@ void reader::impl::query_stripe_compression_info() auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; lvl_stripe_data.resize(_selected_columns.num_levels()); - std::unordered_map + std::unordered_map stream_compinfo_map; // Iterates through levels of nested columns, child column will be one level down @@ -819,13 +815,8 @@ void reader::impl::query_stripe_compression_info() auto const stripe_footer = stripe.second; auto stream_count = stream_info.size(); - auto const total_data_size = gather_stream_info(stripe_idx, - level, - stripe_info, - stripe_footer, - col_meta.orc_col_map[level], - stream_info, - stream_compinfo_map); + auto const total_data_size = gather_stream_info( + stripe_idx, level, stripe_info, stripe_footer, col_meta.orc_col_map[level], stream_info); auto const is_stripe_data_empty = total_data_size == 0; CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, @@ -882,19 +873,14 @@ void reader::impl::query_stripe_compression_info() cudf::detail::hostdevice_vector compinfo( 0, stream_info.size(), _stream); - for (auto& [stripe_level, stripe_level_info] : stream_compinfo_map) { - auto const& info = *(stripe_level_info.stream_info); + for (auto const& info : stream_info) { compinfo.push_back(gpu::CompressedStreamInfo( - static_cast(stripe_data[stripe_level.stripe_idx].data()) + info.dst_pos, + static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, info.length)); - stripe_level_info.comp_info = &compinfo[compinfo.size() - 1]; + stream_compinfo_map[stripe_level_index{info.stripe_idx, info.level}] = + &compinfo[compinfo.size() - 1]; } - // for (auto const& info : stream_info) { - // compinfo.push_back(gpu::CompressedStreamInfo( - // static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, - // info.length)); - // } compinfo.host_to_device_async(_stream); gpu::ParseCompressedStripeData(compinfo.device_ptr(), @@ -905,15 +891,14 @@ void reader::impl::query_stripe_compression_info() compinfo.device_to_host_sync(_stream); auto& compinfo_map = _file_itm_data->compinfo_map; - for (auto& [stripe_level, stripe_level_info] : stream_compinfo_map) { + for (auto& [stripe_level, stream_compinfo] : stream_compinfo_map) { if (compinfo_map.find(stripe_level) == compinfo_map.end()) { compinfo_map[stripe_level] = stripe_level_comp_info{0, 0}; } - auto const& stream_compinfo = *stripe_level_info.comp_info; - compinfo_map[stripe_level].num_compressed_blocks += stream_compinfo.num_compressed_blocks; + compinfo_map[stripe_level].num_compressed_blocks += stream_compinfo->num_compressed_blocks; compinfo_map[stripe_level].num_uncompressed_blocks += - stream_compinfo.num_uncompressed_blocks; - compinfo_map[stripe_level].total_decomp_size += stream_compinfo.max_uncompressed_size; + stream_compinfo->num_uncompressed_blocks; + compinfo_map[stripe_level].total_decomp_size += stream_compinfo->max_uncompressed_size; } } else { @@ -1069,6 +1054,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, auto stream_count = stream_info.size(); auto const total_data_size = gather_stream_info_and_update_chunks(stripe_idx, + level, stripe_info, stripe_footer, col_meta.orc_col_map[level], From a701e2988b963c79506fbc74f2363309ffc50d18 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 15:31:58 -0800 Subject: [PATCH 011/321] Reuse cached comp info Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.hpp | 6 +-- cpp/src/io/orc/reader_impl_preprocess.cu | 55 +++++++++++++----------- 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 69fcd4d0772..d336ad30bc5 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -35,9 +35,9 @@ struct stripe_level_index { std::size_t level; }; struct stripe_level_comp_info { - std::size_t num_compressed_blocks; - std::size_t num_uncompressed_blocks; - std::size_t total_decomp_size; + std::size_t num_compressed_blocks{0}; + std::size_t num_uncompressed_blocks{0}; + std::size_t total_decomp_size{0}; }; struct stripe_level_equal { bool operator()(stripe_level_index const& lhs, stripe_level_index const& rhs) const diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 8fb1e9a9031..ad8eabd2ae8 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -206,6 +206,7 @@ std::size_t gather_stream_info_and_update_chunks( */ rmm::device_buffer decompress_stripe_data( OrcDecompressor const& decompressor, + stripe_level_comp_info comp_info, host_span stripe_data, host_span stream_info, cudf::detail::hostdevice_2dvector& chunks, @@ -215,32 +216,11 @@ rmm::device_buffer decompress_stripe_data( bool use_base_stride, rmm::cuda_stream_view stream) { - // Parse the columns' compressed info - cudf::detail::hostdevice_vector compinfo( - 0, stream_info.size(), stream); - for (auto const& info : stream_info) { - compinfo.push_back(gpu::CompressedStreamInfo( - static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, - info.length)); - } - compinfo.host_to_device_async(stream); - - gpu::ParseCompressedStripeData(compinfo.device_ptr(), - compinfo.size(), - decompressor.GetBlockSize(), - decompressor.GetLog2MaxCompressionRatio(), - stream); - compinfo.device_to_host_sync(stream); - // Count the exact number of compressed blocks - std::size_t num_compressed_blocks = 0; - std::size_t num_uncompressed_blocks = 0; - std::size_t total_decomp_size = 0; - for (std::size_t i = 0; i < compinfo.size(); ++i) { - num_compressed_blocks += compinfo[i].num_compressed_blocks; - num_uncompressed_blocks += compinfo[i].num_uncompressed_blocks; - total_decomp_size += compinfo[i].max_uncompressed_size; - } + std::size_t num_compressed_blocks = comp_info.num_compressed_blocks; + std::size_t num_uncompressed_blocks = comp_info.num_uncompressed_blocks; + std::size_t total_decomp_size = comp_info.total_decomp_size; + CUDF_EXPECTS( not((num_uncompressed_blocks + num_compressed_blocks > 0) and (total_decomp_size == 0)), "Inconsistent info on compression blocks"); @@ -261,12 +241,25 @@ rmm::device_buffer decompress_stripe_data( inflate_res.end(), compression_result{0, compression_status::FAILURE}); + cudf::detail::hostdevice_vector compinfo( + 0, stream_info.size(), stream); + for (auto const& info : stream_info) { + compinfo.push_back(gpu::CompressedStreamInfo( + static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, + info.length)); + } + // Parse again to populate the decompression input/output buffers std::size_t decomp_offset = 0; uint32_t max_uncomp_block_size = 0; uint32_t start_pos = 0; auto start_pos_uncomp = (uint32_t)num_compressed_blocks; for (std::size_t i = 0; i < compinfo.size(); ++i) { + // TODO: need this? + compinfo[i].num_compressed_blocks = num_compressed_blocks; + compinfo[i].num_uncompressed_blocks = num_uncompressed_blocks; + compinfo[i].max_uncompressed_size = total_decomp_size; + auto dst_base = static_cast(decomp_data.data()); compinfo[i].uncompressed_data = dst_base + decomp_offset; compinfo[i].dec_in_ctl = inflate_in.data() + start_pos; @@ -766,6 +759,7 @@ void reader::impl::query_stripe_compression_info() auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; lvl_stripe_data.resize(_selected_columns.num_levels()); + // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. std::unordered_mapcompinfo_map; + for (auto const& info : stream_info) { + auto const& precomputed_info = + compinfo_map.at(stripe_level_index{info.stripe_idx, info.level}); + comp_info.num_compressed_blocks += precomputed_info.num_compressed_blocks; + comp_info.num_uncompressed_blocks += precomputed_info.num_uncompressed_blocks; + comp_info.total_decomp_size += precomputed_info.total_decomp_size; + } + auto decomp_data = decompress_stripe_data(*_metadata.per_file_metadata[0].decompressor, + comp_info, stripe_data, stream_info, chunks, From fbc976af5469e0c5171a0898018a8d71e3cb13cd Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 16:09:24 -0800 Subject: [PATCH 012/321] Implement stream identification Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.hpp | 25 +++--- cpp/src/io/orc/reader_impl_preprocess.cu | 107 +++++++++++++---------- 2 files changed, 74 insertions(+), 58 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index d336ad30bc5..c90f606da5a 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -30,26 +30,32 @@ namespace cudf::io::orc::detail { -struct stripe_level_index { +// unify this with orc_stream_info +struct stream_id_info { std::size_t stripe_idx; std::size_t level; + uint32_t orc_col_idx; + StreamKind kind; }; struct stripe_level_comp_info { std::size_t num_compressed_blocks{0}; std::size_t num_uncompressed_blocks{0}; std::size_t total_decomp_size{0}; }; -struct stripe_level_equal { - bool operator()(stripe_level_index const& lhs, stripe_level_index const& rhs) const +struct stream_id_equal { + bool operator()(stream_id_info const& lhs, stream_id_info const& rhs) const { - return lhs.stripe_idx == rhs.stripe_idx && lhs.level == rhs.level; + return lhs.stripe_idx == rhs.stripe_idx && lhs.level == rhs.level && + lhs.orc_col_idx == rhs.orc_col_idx && lhs.kind == rhs.kind; } }; -struct stripe_level_hash { - std::size_t operator()(stripe_level_index const& index) const +struct stream_id_hash { + std::size_t operator()(stream_id_info const& index) const { auto const hasher = std::hash{}; - return hasher(index.stripe_idx) ^ hasher(index.level); + return hasher(index.stripe_idx) ^ hasher(index.level) ^ + hasher(static_cast(index.orc_col_idx)) ^ + hasher(static_cast(index.kind)); } }; @@ -57,9 +63,8 @@ struct stripe_level_hash { * @brief Struct to store file-level data that remains constant for all chunks being read. */ struct file_intermediate_data { - std:: - unordered_map - compinfo_map; + std::unordered_map + compinfo_map; bool compinfo_ready{false}; std::vector> lvl_stripe_data; diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index ad8eabd2ae8..e1702187e87 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -58,8 +58,16 @@ struct orc_stream_info { std::size_t dst_pos_, uint32_t length_, uint32_t stripe_idx_, - std::size_t level_) - : offset(offset_), dst_pos(dst_pos_), length(length_), stripe_idx(stripe_idx_), level(level_) + std::size_t level_, + uint32_t orc_col_idx_, + StreamKind kind_) + : offset(offset_), + dst_pos(dst_pos_), + length(length_), + stripe_idx(stripe_idx_), + level(level_), + orc_col_idx(orc_col_idx_), + kind(kind_) { } uint64_t offset; // offset in file @@ -67,6 +75,8 @@ struct orc_stream_info { std::size_t length; // length in file uint32_t stripe_idx; // stripe processing index, not stripe index in source std::size_t level; // TODO + uint32_t orc_col_idx; + StreamKind kind; }; /** @@ -92,8 +102,13 @@ std::size_t gather_stream_info(std::size_t stripe_index, auto const col_order = orc2gdf[column_id]; if (col_order != -1) { - stream_info.emplace_back( - stripeinfo->offset + src_offset, dst_offset, stream.length, stripe_index, level); + stream_info.emplace_back(stripeinfo->offset + src_offset, + dst_offset, + stream.length, + stripe_index, + level, + column_id, + stream.kind); dst_offset += stream.length; } src_offset += stream.length; @@ -180,8 +195,13 @@ std::size_t gather_stream_info_and_update_chunks( } } } - stream_info.emplace_back( - stripeinfo->offset + src_offset, dst_offset, stream.length, stripe_index, level); + stream_info.emplace_back(stripeinfo->offset + src_offset, + dst_offset, + stream.length, + stripe_index, + level, + column_id, + stream.kind); dst_offset += stream.length; } src_offset += stream.length; @@ -205,8 +225,9 @@ std::size_t gather_stream_info_and_update_chunks( * @return Device buffer to decompressed page data */ rmm::device_buffer decompress_stripe_data( + std::unordered_map const& + compinfo_map, OrcDecompressor const& decompressor, - stripe_level_comp_info comp_info, host_span stripe_data, host_span stream_info, cudf::detail::hostdevice_2dvector& chunks, @@ -217,9 +238,28 @@ rmm::device_buffer decompress_stripe_data( rmm::cuda_stream_view stream) { // Count the exact number of compressed blocks - std::size_t num_compressed_blocks = comp_info.num_compressed_blocks; - std::size_t num_uncompressed_blocks = comp_info.num_uncompressed_blocks; - std::size_t total_decomp_size = comp_info.total_decomp_size; + std::size_t num_compressed_blocks = 0; + std::size_t num_uncompressed_blocks = 0; + std::size_t total_decomp_size = 0; + + cudf::detail::hostdevice_vector compinfo( + 0, stream_info.size(), stream); + for (auto const& info : stream_info) { + compinfo.push_back(gpu::CompressedStreamInfo( + static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, + info.length)); + + auto const& cached_comp_info = + compinfo_map.at(stream_id_info{info.stripe_idx, info.level, info.orc_col_idx, info.kind}); + auto& stream_comp_info = compinfo[compinfo.size() - 1]; + stream_comp_info.num_compressed_blocks = cached_comp_info.num_compressed_blocks; + stream_comp_info.num_uncompressed_blocks = cached_comp_info.num_uncompressed_blocks; + stream_comp_info.max_uncompressed_size = cached_comp_info.total_decomp_size; + + num_compressed_blocks += cached_comp_info.num_compressed_blocks; + num_uncompressed_blocks += cached_comp_info.num_uncompressed_blocks; + total_decomp_size += cached_comp_info.total_decomp_size; + } CUDF_EXPECTS( not((num_uncompressed_blocks + num_compressed_blocks > 0) and (total_decomp_size == 0)), @@ -241,25 +281,12 @@ rmm::device_buffer decompress_stripe_data( inflate_res.end(), compression_result{0, compression_status::FAILURE}); - cudf::detail::hostdevice_vector compinfo( - 0, stream_info.size(), stream); - for (auto const& info : stream_info) { - compinfo.push_back(gpu::CompressedStreamInfo( - static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, - info.length)); - } - // Parse again to populate the decompression input/output buffers std::size_t decomp_offset = 0; uint32_t max_uncomp_block_size = 0; uint32_t start_pos = 0; auto start_pos_uncomp = (uint32_t)num_compressed_blocks; for (std::size_t i = 0; i < compinfo.size(); ++i) { - // TODO: need this? - compinfo[i].num_compressed_blocks = num_compressed_blocks; - compinfo[i].num_uncompressed_blocks = num_uncompressed_blocks; - compinfo[i].max_uncompressed_size = total_decomp_size; - auto dst_base = static_cast(decomp_data.data()); compinfo[i].uncompressed_data = dst_base + decomp_offset; compinfo[i].dec_in_ctl = inflate_in.data() + start_pos; @@ -760,10 +787,7 @@ void reader::impl::query_stripe_compression_info() lvl_stripe_data.resize(_selected_columns.num_levels()); // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. - std::unordered_map + std::unordered_map stream_compinfo_map; // Iterates through levels of nested columns, child column will be one level down @@ -871,7 +895,8 @@ void reader::impl::query_stripe_compression_info() compinfo.push_back(gpu::CompressedStreamInfo( static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, info.length)); - stream_compinfo_map[stripe_level_index{info.stripe_idx, info.level}] = + stream_compinfo_map[stream_id_info{ + info.stripe_idx, info.level, info.orc_col_idx, info.kind}] = &compinfo[compinfo.size() - 1]; } @@ -885,14 +910,10 @@ void reader::impl::query_stripe_compression_info() compinfo.device_to_host_sync(_stream); auto& compinfo_map = _file_itm_data->compinfo_map; - for (auto& [stripe_level, stream_compinfo] : stream_compinfo_map) { - if (compinfo_map.find(stripe_level) == compinfo_map.end()) { - compinfo_map[stripe_level] = stripe_level_comp_info{0, 0}; - } - compinfo_map[stripe_level].num_compressed_blocks += stream_compinfo->num_compressed_blocks; - compinfo_map[stripe_level].num_uncompressed_blocks += - stream_compinfo->num_uncompressed_blocks; - compinfo_map[stripe_level].total_decomp_size += stream_compinfo->max_uncompressed_size; + for (auto& [stream_id, stream_compinfo] : stream_compinfo_map) { + compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, + stream_compinfo->num_uncompressed_blocks, + stream_compinfo->max_uncompressed_size}; } } else { @@ -1184,18 +1205,8 @@ void reader::impl::prepare_data(uint64_t skip_rows, } // Setup row group descriptors if using indexes if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { - stripe_level_comp_info comp_info; - auto& compinfo_map = _file_itm_data->compinfo_map; - for (auto const& info : stream_info) { - auto const& precomputed_info = - compinfo_map.at(stripe_level_index{info.stripe_idx, info.level}); - comp_info.num_compressed_blocks += precomputed_info.num_compressed_blocks; - comp_info.num_uncompressed_blocks += precomputed_info.num_uncompressed_blocks; - comp_info.total_decomp_size += precomputed_info.total_decomp_size; - } - - auto decomp_data = decompress_stripe_data(*_metadata.per_file_metadata[0].decompressor, - comp_info, + auto decomp_data = decompress_stripe_data(_file_itm_data->compinfo_map, + *_metadata.per_file_metadata[0].decompressor, stripe_data, stream_info, chunks, From 2070045a73b65a1745af8413c58e4d4e35ae3533 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 17:03:46 -0800 Subject: [PATCH 013/321] Fix bug Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_preprocess.cu | 100 +++++++++++++++++++---- 1 file changed, 85 insertions(+), 15 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index e1702187e87..bfc073abdbe 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -69,6 +69,13 @@ struct orc_stream_info { orc_col_idx(orc_col_idx_), kind(kind_) { +#if 0 + printf(" construct stripe id [%d, %d, %d, %d]\n", + (int)stripe_idx, + (int)level, + (int)orc_col_idx, + (int)kind); +#endif } uint64_t offset; // offset in file std::size_t dst_pos; // offset in memory relative to start of compressed stripe data @@ -195,6 +202,7 @@ std::size_t gather_stream_info_and_update_chunks( } } } + stream_info.emplace_back(stripeinfo->offset + src_offset, dst_offset, stream.length, @@ -245,6 +253,17 @@ rmm::device_buffer decompress_stripe_data( cudf::detail::hostdevice_vector compinfo( 0, stream_info.size(), stream); for (auto const& info : stream_info) { +#if 0 + printf("collec stream again [%d, %d, %d, %d]: dst = %lu, length = %lu\n", + (int)info.stripe_idx, + (int)info.level, + (int)info.orc_col_idx, + (int)info.kind, + info.dst_pos, + info.length); + fflush(stdout); +#endif + compinfo.push_back(gpu::CompressedStreamInfo( static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, info.length)); @@ -265,6 +284,46 @@ rmm::device_buffer decompress_stripe_data( not((num_uncompressed_blocks + num_compressed_blocks > 0) and (total_decomp_size == 0)), "Inconsistent info on compression blocks"); +#if 0 + std::size_t old_num_compressed_blocks = num_compressed_blocks; + std::size_t old_num_uncompressed_blocks = num_uncompressed_blocks; + std::size_t old_total_decomp_size = total_decomp_size; + + num_compressed_blocks = 0; + num_uncompressed_blocks = 0; + total_decomp_size = 0; + for (std::size_t i = 0; i < compinfo.size(); ++i) { + num_compressed_blocks += compinfo[i].num_compressed_blocks; + num_uncompressed_blocks += compinfo[i].num_uncompressed_blocks; + total_decomp_size += compinfo[i].max_uncompressed_size; + + auto const& info = stream_info[i]; + printf("compute info [%d, %d, %d, %d]: %lu | %lu | %lu\n", + (int)info.stripe_idx, + (int)info.level, + (int)info.orc_col_idx, + (int)info.kind, + (size_t)compinfo[i].num_compressed_blocks, + (size_t)compinfo[i].num_uncompressed_blocks, + compinfo[i].max_uncompressed_size); + fflush(stdout); + } + + if (old_num_compressed_blocks != num_compressed_blocks || + old_num_uncompressed_blocks != num_uncompressed_blocks || + old_total_decomp_size != total_decomp_size) { + printf("invalid: %d - %d, %d - %d, %d - %d\n", + (int)old_num_compressed_blocks, + (int)num_compressed_blocks, + (int)old_num_uncompressed_blocks, + (int)num_uncompressed_blocks, + (int)old_total_decomp_size, + (int)total_decomp_size + + ); + } +#endif + // Buffer needs to be padded. // Required by `gpuDecodeOrcColumnData`. rmm::device_buffer decomp_data( @@ -768,21 +827,6 @@ void reader::impl::query_stripe_compression_info() // TODO : remove? if (rows_to_read == 0 || selected_stripes.empty()) { return; } - // Set up table for converting timestamp columns from local to UTC time - auto const tz_table = [&, &selected_stripes = selected_stripes] { - auto const has_timestamp_column = std::any_of( - _selected_columns.levels.cbegin(), _selected_columns.levels.cend(), [&](auto const& col_lvl) { - return std::any_of(col_lvl.cbegin(), col_lvl.cend(), [&](auto const& col_meta) { - return _metadata.get_col_type(col_meta.id).kind == TypeKind::TIMESTAMP; - }); - }); - - return has_timestamp_column - ? cudf::detail::make_timezone_transition_table( - {}, selected_stripes[0].stripe_info[0].second->writerTimezone, _stream) - : std::make_unique(); - }(); - auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; lvl_stripe_data.resize(_selected_columns.num_levels()); @@ -898,6 +942,16 @@ void reader::impl::query_stripe_compression_info() stream_compinfo_map[stream_id_info{ info.stripe_idx, info.level, info.orc_col_idx, info.kind}] = &compinfo[compinfo.size() - 1]; +#if 0 + printf("collec stream [%d, %d, %d, %d]: dst = %lu, length = %lu\n", + (int)info.stripe_idx, + (int)info.level, + (int)info.orc_col_idx, + (int)info.kind, + info.dst_pos, + info.length); + fflush(stdout); +#endif } compinfo.host_to_device_async(_stream); @@ -914,8 +968,22 @@ void reader::impl::query_stripe_compression_info() compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, stream_compinfo->num_uncompressed_blocks, stream_compinfo->max_uncompressed_size}; +#if 0 + printf("cache info [%d, %d, %d, %d]: %lu | %lu | %lu\n", + (int)stream_id.stripe_idx, + (int)stream_id.level, + (int)stream_id.orc_col_idx, + (int)stream_id.kind, + (size_t)stream_compinfo->num_compressed_blocks, + (size_t)stream_compinfo->num_uncompressed_blocks, + stream_compinfo->max_uncompressed_size); + fflush(stdout); +#endif } + // Must clear so we will not overwrite the old compression info stream_id. + stream_compinfo_map.clear(); + } else { printf("no compression \n"); fflush(stdout); @@ -924,6 +992,8 @@ void reader::impl::query_stripe_compression_info() // TODO } + printf(" end level %d\n\n", (int)level); + } // end loop level lvl_stripe_data.clear(); From 84813f4148aa0243557e6c97aeb469b4eae49796 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 17:08:05 -0800 Subject: [PATCH 014/321] Remove comment Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_preprocess.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index bfc073abdbe..56c518a9d6a 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -270,6 +270,8 @@ rmm::device_buffer decompress_stripe_data( auto const& cached_comp_info = compinfo_map.at(stream_id_info{info.stripe_idx, info.level, info.orc_col_idx, info.kind}); + // auto const& cached_comp_info = + // compinfo_map[stream_id_info{info.stripe_idx, info.level, info.orc_col_idx, info.kind}]; auto& stream_comp_info = compinfo[compinfo.size() - 1]; stream_comp_info.num_compressed_blocks = cached_comp_info.num_compressed_blocks; stream_comp_info.num_uncompressed_blocks = cached_comp_info.num_uncompressed_blocks; @@ -992,7 +994,7 @@ void reader::impl::query_stripe_compression_info() // TODO } - printf(" end level %d\n\n", (int)level); + // printf(" end level %d\n\n", (int)level); } // end loop level From cd4f719fa32072c558508113a052d1ef35a1d90e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 17:29:09 -0800 Subject: [PATCH 015/321] Disable comment Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_preprocess.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 56c518a9d6a..e7002a00579 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -987,8 +987,8 @@ void reader::impl::query_stripe_compression_info() stream_compinfo_map.clear(); } else { - printf("no compression \n"); - fflush(stdout); + // printf("no compression \n"); + // fflush(stdout); // Set decompressed data size equal to the input size. // TODO From abe91180e427c5d575282c9a2c2c4425801ab5d5 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 17:49:41 -0800 Subject: [PATCH 016/321] Do not read data again Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_preprocess.cu | 72 +++++++++++------------- 1 file changed, 34 insertions(+), 38 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index e7002a00579..5f74daa8ac2 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -998,7 +998,7 @@ void reader::impl::query_stripe_compression_info() } // end loop level - lvl_stripe_data.clear(); + // lvl_stripe_data.clear(); _file_itm_data->compinfo_ready = true; } @@ -1131,7 +1131,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, std::size_t num_rowgroups = 0; int stripe_idx = 0; - std::vector, std::size_t>> read_tasks; + // std::vector, std::size_t>> read_tasks; for (auto const& stripe_source_mapping : selected_stripes) { // Iterate through the source files selected stripes for (auto const& stripe : stripe_source_mapping.stripe_info) { @@ -1156,41 +1156,37 @@ void reader::impl::prepare_data(uint64_t skip_rows, CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, "Invalid index rowgroup stream data"); - // Buffer needs to be padded. - // Required by `copy_uncompressed_kernel`. - stripe_data.emplace_back( - cudf::util::round_up_safe(total_data_size, BUFFER_PADDING_MULTIPLE), _stream); - auto dst_base = static_cast(stripe_data.back().data()); + auto dst_base = static_cast(stripe_data[stripe_idx].data()); // Coalesce consecutive streams into one read - while (not is_stripe_data_empty and stream_count < stream_info.size()) { - auto const d_dst = dst_base + stream_info[stream_count].dst_pos; - auto const offset = stream_info[stream_count].offset; - auto len = stream_info[stream_count].length; - stream_count++; - - while (stream_count < stream_info.size() && - stream_info[stream_count].offset == offset + len) { - len += stream_info[stream_count].length; - stream_count++; - } - if (_metadata.per_file_metadata[stripe_source_mapping.source_idx] - .source->is_device_read_preferred(len)) { - read_tasks.push_back( - std::pair(_metadata.per_file_metadata[stripe_source_mapping.source_idx] - .source->device_read_async(offset, len, d_dst, _stream), - len)); - - } else { - auto const buffer = - _metadata.per_file_metadata[stripe_source_mapping.source_idx].source->host_read( - offset, len); - CUDF_EXPECTS(buffer->size() == len, "Unexpected discrepancy in bytes read."); - CUDF_CUDA_TRY( - cudaMemcpyAsync(d_dst, buffer->data(), len, cudaMemcpyDefault, _stream.value())); - _stream.synchronize(); - } - } + // while (not is_stripe_data_empty and stream_count < stream_info.size()) { + // auto const d_dst = dst_base + stream_info[stream_count].dst_pos; + // auto const offset = stream_info[stream_count].offset; + // auto len = stream_info[stream_count].length; + // stream_count++; + + // while (stream_count < stream_info.size() && + // stream_info[stream_count].offset == offset + len) { + // len += stream_info[stream_count].length; + // stream_count++; + // } + // if (_metadata.per_file_metadata[stripe_source_mapping.source_idx] + // .source->is_device_read_preferred(len)) { + // read_tasks.push_back( + // std::pair(_metadata.per_file_metadata[stripe_source_mapping.source_idx] + // .source->device_read_async(offset, len, d_dst, _stream), + // len)); + + // } else { + // auto const buffer = + // _metadata.per_file_metadata[stripe_source_mapping.source_idx].source->host_read( + // offset, len); + // CUDF_EXPECTS(buffer->size() == len, "Unexpected discrepancy in bytes read."); + // CUDF_CUDA_TRY( + // cudaMemcpyAsync(d_dst, buffer->data(), len, cudaMemcpyDefault, _stream.value())); + // _stream.synchronize(); + // } + // } auto const num_rows_per_stripe = stripe_info->numberOfRows; auto const rowgroup_id = num_rowgroups; @@ -1250,9 +1246,9 @@ void reader::impl::prepare_data(uint64_t skip_rows, stripe_idx++; } } - for (auto& task : read_tasks) { - CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); - } + // for (auto& task : read_tasks) { + // CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); + // } if (stripe_data.empty()) { continue; } From c1cdf26b78589ee1f8a98bc2c2872a5155abf326 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 18:06:44 -0800 Subject: [PATCH 017/321] Fix header year Signed-off-by: Nghia Truong --- cpp/benchmarks/io/orc/orc_reader_input.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index fd27b56ef0e..8254bf65fe2 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From aaf2c31d468db1144463938bc42b0a0c729df03a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 20:45:02 -0800 Subject: [PATCH 018/321] Cleanup Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_preprocess.cu | 46 ++++-------------------- 1 file changed, 7 insertions(+), 39 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 5f74daa8ac2..ea636fc4ddc 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -836,6 +836,10 @@ void reader::impl::query_stripe_compression_info() std::unordered_map stream_compinfo_map; + // Logically view streams as columns + std::vector stream_info; + stream_info.reserve(selected_stripes.size() * selected_stripes.front().stripe_info.size()); + // Iterates through levels of nested columns, child column will be one level down // compared to parent column. auto& col_meta = *_col_meta; @@ -849,7 +853,9 @@ void reader::impl::query_stripe_compression_info() // Map each ORC column to its column col_meta.orc_col_map[level][col.id] = col_id++; } + } + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { // Get the total number of stripes across all input files. std::size_t total_num_stripes = std::accumulate(selected_stripes.begin(), @@ -858,13 +864,6 @@ void reader::impl::query_stripe_compression_info() [](std::size_t sum, auto& stripe_source_mapping) { return sum + stripe_source_mapping.stripe_info.size(); }); - auto const num_columns = columns_level.size(); - cudf::detail::hostdevice_2dvector chunks( - total_num_stripes, num_columns, _stream); - memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); - - // Logically view streams as columns - std::vector stream_info; // Tracker for eventually deallocating compressed and uncompressed data auto& stripe_data = lvl_stripe_data[level]; @@ -984,6 +983,7 @@ void reader::impl::query_stripe_compression_info() } // Must clear so we will not overwrite the old compression info stream_id. + stream_info.clear(); stream_compinfo_map.clear(); } else { @@ -1056,7 +1056,6 @@ void reader::impl::prepare_data(uint64_t skip_rows, for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto& columns_level = _selected_columns.levels[level]; // Association between each ORC column and its cudf::column - col_meta.orc_col_map.emplace_back(_metadata.get_num_cols(), -1); std::vector nested_cols; // Get a list of column data types @@ -1080,7 +1079,6 @@ void reader::impl::prepare_data(uint64_t skip_rows, } // Map each ORC column to its column - col_meta.orc_col_map[level][col.id] = column_types.size() - 1; if (col_type == type_id::LIST or col_type == type_id::STRUCT) { nested_cols.emplace_back(col); } @@ -1158,36 +1156,6 @@ void reader::impl::prepare_data(uint64_t skip_rows, auto dst_base = static_cast(stripe_data[stripe_idx].data()); - // Coalesce consecutive streams into one read - // while (not is_stripe_data_empty and stream_count < stream_info.size()) { - // auto const d_dst = dst_base + stream_info[stream_count].dst_pos; - // auto const offset = stream_info[stream_count].offset; - // auto len = stream_info[stream_count].length; - // stream_count++; - - // while (stream_count < stream_info.size() && - // stream_info[stream_count].offset == offset + len) { - // len += stream_info[stream_count].length; - // stream_count++; - // } - // if (_metadata.per_file_metadata[stripe_source_mapping.source_idx] - // .source->is_device_read_preferred(len)) { - // read_tasks.push_back( - // std::pair(_metadata.per_file_metadata[stripe_source_mapping.source_idx] - // .source->device_read_async(offset, len, d_dst, _stream), - // len)); - - // } else { - // auto const buffer = - // _metadata.per_file_metadata[stripe_source_mapping.source_idx].source->host_read( - // offset, len); - // CUDF_EXPECTS(buffer->size() == len, "Unexpected discrepancy in bytes read."); - // CUDF_CUDA_TRY( - // cudaMemcpyAsync(d_dst, buffer->data(), len, cudaMemcpyDefault, _stream.value())); - // _stream.synchronize(); - // } - // } - auto const num_rows_per_stripe = stripe_info->numberOfRows; auto const rowgroup_id = num_rowgroups; auto stripe_num_rowgroups = 0; From 3da6ca794cd0e33a4e476e92210e13be1609570b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 22:28:19 -0800 Subject: [PATCH 019/321] Trying to read stripe-by-stripe Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.hpp | 3 +- cpp/src/io/orc/reader_impl_preprocess.cu | 285 ++++++++++++----------- 2 files changed, 150 insertions(+), 138 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index c90f606da5a..4bafc38dbdd 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -67,7 +67,8 @@ struct file_intermediate_data { compinfo_map; bool compinfo_ready{false}; - std::vector> lvl_stripe_data; + // Tracker for eventually deallocating compressed and uncompressed data + std::vector stripe_data; std::vector>> null_count_prefix_sums; int64_t rows_to_skip; diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index ea636fc4ddc..7e8feb4ef49 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -108,6 +108,8 @@ std::size_t gather_stream_info(std::size_t stripe_index, auto const column_id = *stream.column_id; auto const col_order = orc2gdf[column_id]; + // TODO + // optimize this. if (col_order != -1) { stream_info.emplace_back(stripeinfo->offset + src_offset, dst_offset, @@ -829,8 +831,8 @@ void reader::impl::query_stripe_compression_info() // TODO : remove? if (rows_to_read == 0 || selected_stripes.empty()) { return; } - auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; - lvl_stripe_data.resize(_selected_columns.num_levels()); + auto& stripe_data = _file_itm_data->stripe_data; + // lvl_stripe_data.resize(_selected_columns.num_levels()); // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. std::unordered_map @@ -855,94 +857,94 @@ void reader::impl::query_stripe_compression_info() } } - for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { - // Get the total number of stripes across all input files. - std::size_t total_num_stripes = - std::accumulate(selected_stripes.begin(), - selected_stripes.end(), - 0, - [](std::size_t sum, auto& stripe_source_mapping) { - return sum + stripe_source_mapping.stripe_info.size(); - }); + // Get the total number of stripes across all input files. + std::size_t total_num_stripes = + std::accumulate(selected_stripes.begin(), + selected_stripes.end(), + 0, + [](std::size_t sum, auto& stripe_source_mapping) { + return sum + stripe_source_mapping.stripe_info.size(); + }); + stripe_data.reserve(total_num_stripes); - // Tracker for eventually deallocating compressed and uncompressed data - auto& stripe_data = lvl_stripe_data[level]; + int stripe_idx = 0; - int stripe_idx = 0; + for (auto const& stripe_source_mapping : selected_stripes) { + // Iterate through the source files selected stripes + for (auto const& stripe : stripe_source_mapping.stripe_info) { + auto const stripe_info = stripe.first; + auto const stripe_footer = stripe.second; - std::vector, std::size_t>> read_tasks; - for (auto const& stripe_source_mapping : selected_stripes) { - // Iterate through the source files selected stripes - for (auto const& stripe : stripe_source_mapping.stripe_info) { - auto const stripe_info = stripe.first; - auto const stripe_footer = stripe.second; + auto stream_count = stream_info.size(); - auto stream_count = stream_info.size(); - auto const total_data_size = gather_stream_info( + std::vector, std::size_t>> read_tasks; + std::size_t total_data_size{0}; + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + total_data_size += gather_stream_info( stripe_idx, level, stripe_info, stripe_footer, col_meta.orc_col_map[level], stream_info); - - auto const is_stripe_data_empty = total_data_size == 0; - CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, - "Invalid index rowgroup stream data"); - - // Buffer needs to be padded. - // Required by `copy_uncompressed_kernel`. - stripe_data.emplace_back( - cudf::util::round_up_safe(total_data_size, BUFFER_PADDING_MULTIPLE), _stream); - auto dst_base = static_cast(stripe_data.back().data()); - - // Coalesce consecutive streams into one read - while (not is_stripe_data_empty and stream_count < stream_info.size()) { - auto const d_dst = dst_base + stream_info[stream_count].dst_pos; - auto const offset = stream_info[stream_count].offset; - auto len = stream_info[stream_count].length; + } // end loop level + + auto const is_stripe_data_empty = total_data_size == 0; + CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, + "Invalid index rowgroup stream data"); + + // Buffer needs to be padded. + // Required by `copy_uncompressed_kernel`. + stripe_data.emplace_back(cudf::util::round_up_safe(total_data_size, BUFFER_PADDING_MULTIPLE), + _stream); + auto dst_base = static_cast(stripe_data.back().data()); + + // Coalesce consecutive streams into one read + while (not is_stripe_data_empty and stream_count < stream_info.size()) { + auto const d_dst = dst_base + stream_info[stream_count].dst_pos; + auto const offset = stream_info[stream_count].offset; + auto len = stream_info[stream_count].length; + stream_count++; + + while (stream_count < stream_info.size() && + stream_info[stream_count].offset == offset + len) { + len += stream_info[stream_count].length; stream_count++; - - while (stream_count < stream_info.size() && - stream_info[stream_count].offset == offset + len) { - len += stream_info[stream_count].length; - stream_count++; - } - if (_metadata.per_file_metadata[stripe_source_mapping.source_idx] - .source->is_device_read_preferred(len)) { - read_tasks.push_back( - std::pair(_metadata.per_file_metadata[stripe_source_mapping.source_idx] - .source->device_read_async(offset, len, d_dst, _stream), - len)); - - } else { - auto const buffer = - _metadata.per_file_metadata[stripe_source_mapping.source_idx].source->host_read( - offset, len); - CUDF_EXPECTS(buffer->size() == len, "Unexpected discrepancy in bytes read."); - CUDF_CUDA_TRY( - cudaMemcpyAsync(d_dst, buffer->data(), len, cudaMemcpyDefault, _stream.value())); - _stream.synchronize(); - } } + if (_metadata.per_file_metadata[stripe_source_mapping.source_idx] + .source->is_device_read_preferred(len)) { + read_tasks.push_back(std::pair( + _metadata.per_file_metadata[stripe_source_mapping.source_idx].source->device_read_async( + offset, len, d_dst, _stream), + len)); - stripe_idx++; + } else { + auto const buffer = + _metadata.per_file_metadata[stripe_source_mapping.source_idx].source->host_read(offset, + len); + CUDF_EXPECTS(buffer->size() == len, "Unexpected discrepancy in bytes read."); + CUDF_CUDA_TRY( + cudaMemcpyAsync(d_dst, buffer->data(), len, cudaMemcpyDefault, _stream.value())); + _stream.synchronize(); + } } + + stripe_idx++; } - for (auto& task : read_tasks) { - CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); - } + } + for (auto& task : read_tasks) { + CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); + } - if (stripe_data.empty()) { continue; } + if (stripe_data.empty()) { continue; } - // Setup row group descriptors if using indexes - if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { - auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; - cudf::detail::hostdevice_vector compinfo( - 0, stream_info.size(), _stream); - - for (auto const& info : stream_info) { - compinfo.push_back(gpu::CompressedStreamInfo( - static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, - info.length)); - stream_compinfo_map[stream_id_info{ - info.stripe_idx, info.level, info.orc_col_idx, info.kind}] = - &compinfo[compinfo.size() - 1]; + // Setup row group descriptors if using indexes + if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { + auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; + cudf::detail::hostdevice_vector compinfo( + 0, stream_info.size(), _stream); + + for (auto const& info : stream_info) { + compinfo.push_back(gpu::CompressedStreamInfo( + static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, + info.length)); + stream_compinfo_map[stream_id_info{ + info.stripe_idx, info.level, info.orc_col_idx, info.kind}] = &compinfo[compinfo.size() - 1]; #if 0 printf("collec stream [%d, %d, %d, %d]: dst = %lu, length = %lu\n", (int)info.stripe_idx, @@ -953,22 +955,22 @@ void reader::impl::query_stripe_compression_info() info.length); fflush(stdout); #endif - } + } - compinfo.host_to_device_async(_stream); + compinfo.host_to_device_async(_stream); - gpu::ParseCompressedStripeData(compinfo.device_ptr(), - compinfo.size(), - decompressor.GetBlockSize(), - decompressor.GetLog2MaxCompressionRatio(), - _stream); - compinfo.device_to_host_sync(_stream); + gpu::ParseCompressedStripeData(compinfo.device_ptr(), + compinfo.size(), + decompressor.GetBlockSize(), + decompressor.GetLog2MaxCompressionRatio(), + _stream); + compinfo.device_to_host_sync(_stream); - auto& compinfo_map = _file_itm_data->compinfo_map; - for (auto& [stream_id, stream_compinfo] : stream_compinfo_map) { - compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, - stream_compinfo->num_uncompressed_blocks, - stream_compinfo->max_uncompressed_size}; + auto& compinfo_map = _file_itm_data->compinfo_map; + for (auto& [stream_id, stream_compinfo] : stream_compinfo_map) { + compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, + stream_compinfo->num_uncompressed_blocks, + stream_compinfo->max_uncompressed_size}; #if 0 printf("cache info [%d, %d, %d, %d]: %lu | %lu | %lu\n", (int)stream_id.stripe_idx, @@ -980,23 +982,23 @@ void reader::impl::query_stripe_compression_info() stream_compinfo->max_uncompressed_size); fflush(stdout); #endif - } + } - // Must clear so we will not overwrite the old compression info stream_id. - stream_info.clear(); - stream_compinfo_map.clear(); + // Must clear so we will not overwrite the old compression info stream_id. + stream_info.clear(); + stream_compinfo_map.clear(); - } else { - // printf("no compression \n"); - // fflush(stdout); + } else { + // printf("no compression \n"); + // fflush(stdout); - // Set decompressed data size equal to the input size. - // TODO - } + // Set decompressed data size equal to the input size. + // TODO + } - // printf(" end level %d\n\n", (int)level); + // printf(" end level %d\n\n", (int)level); - } // end loop level + // } // end loop level // lvl_stripe_data.clear(); _file_itm_data->compinfo_ready = true; @@ -1044,22 +1046,37 @@ void reader::impl::prepare_data(uint64_t skip_rows, : std::make_unique(); }(); - auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; + auto& stripe_data = _file_itm_data->stripe_data; auto& null_count_prefix_sums = _file_itm_data->null_count_prefix_sums; - lvl_stripe_data.resize(_selected_columns.num_levels()); _out_buffers.resize(_selected_columns.num_levels()); // Iterates through levels of nested columns, child column will be one level down // compared to parent column. auto& col_meta = *_col_meta; + + // Get a list of column data types + std::vector> column_types; + column_types.resize(_selected_columns.num_levels()); + + // Association between each ORC column and its cudf::column + std::vector> nested_cols; + nested_cols.resize(_selected_columns.num_levels()); + + // Get the total number of stripes across all input files. + std::size_t total_num_stripes = + std::accumulate(selected_stripes.begin(), + selected_stripes.end(), + 0, + [](std::size_t sum, auto& stripe_source_mapping) { + return sum + stripe_source_mapping.stripe_info.size(); + }); + + std::vector> lvl_chunks; + lvl_chunks.resize(_selected_columns.num_levels()); + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto& columns_level = _selected_columns.levels[level]; - // Association between each ORC column and its cudf::column - std::vector nested_cols; - - // Get a list of column data types - std::vector column_types; for (auto& col : columns_level) { auto col_type = to_cudf_type(_metadata.get_col_type(col.id).kind, _use_np_dtypes, @@ -1073,29 +1090,35 @@ void reader::impl::prepare_data(uint64_t skip_rows, // follow positive scaling. auto const scale = -static_cast(_metadata.get_col_type(col.id).scale.value_or(0)); - column_types.emplace_back(col_type, scale); + column_types[level].emplace_back(col_type, scale); } else { - column_types.emplace_back(col_type); + column_types[level].emplace_back(col_type); } // Map each ORC column to its column if (col_type == type_id::LIST or col_type == type_id::STRUCT) { - nested_cols.emplace_back(col); + nested_cols[level].emplace_back(col); } } - // Get the total number of stripes across all input files. - std::size_t total_num_stripes = - std::accumulate(selected_stripes.begin(), - selected_stripes.end(), - 0, - [](std::size_t sum, auto& stripe_source_mapping) { - return sum + stripe_source_mapping.stripe_info.size(); - }); + null_count_prefix_sums.emplace_back(); + null_count_prefix_sums.back().reserve(_selected_columns.levels[level].size()); + std::generate_n(std::back_inserter(null_count_prefix_sums.back()), + _selected_columns.levels[level].size(), + [&]() { + return cudf::detail::make_zeroed_device_uvector_async( + total_num_stripes, _stream, rmm::mr::get_current_device_resource()); + }); + + auto const num_columns = columns_level.size(); + lvl_chunks[level] = + cudf::detail::hostdevice_2dvector(total_num_stripes, num_columns, _stream); + memset(lvl_chunks[level].base_host_ptr(), 0, lvl_chunks[level].size_bytes()); + } + + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + auto& columns_level = _selected_columns.levels[level]; auto const num_columns = columns_level.size(); - cudf::detail::hostdevice_2dvector chunks( - total_num_stripes, num_columns, _stream); - memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); const bool use_index = _use_index && @@ -1112,31 +1135,19 @@ void reader::impl::prepare_data(uint64_t skip_rows, // Logically view streams as columns std::vector stream_info; - null_count_prefix_sums.emplace_back(); - null_count_prefix_sums.back().reserve(_selected_columns.levels[level].size()); - std::generate_n(std::back_inserter(null_count_prefix_sums.back()), - _selected_columns.levels[level].size(), - [&]() { - return cudf::detail::make_zeroed_device_uvector_async( - total_num_stripes, _stream, rmm::mr::get_current_device_resource()); - }); - - // Tracker for eventually deallocating compressed and uncompressed data - auto& stripe_data = lvl_stripe_data[level]; - std::size_t stripe_start_row = 0; std::size_t num_dict_entries = 0; std::size_t num_rowgroups = 0; int stripe_idx = 0; // std::vector, std::size_t>> read_tasks; + auto& chunks = lvl_chunks[level]; for (auto const& stripe_source_mapping : selected_stripes) { // Iterate through the source files selected stripes for (auto const& stripe : stripe_source_mapping.stripe_info) { auto const stripe_info = stripe.first; auto const stripe_footer = stripe.second; - auto stream_count = stream_info.size(); auto const total_data_size = gather_stream_info_and_update_chunks(stripe_idx, level, From f96b513edc044013567e54621c5eed5a371aa6c3 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 26 Jan 2024 22:28:23 -0800 Subject: [PATCH 020/321] Revert "Trying to read stripe-by-stripe" This reverts commit 3da6ca794cd0e33a4e476e92210e13be1609570b. --- cpp/src/io/orc/reader_impl_chunking.hpp | 3 +- cpp/src/io/orc/reader_impl_preprocess.cu | 285 +++++++++++------------ 2 files changed, 138 insertions(+), 150 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 4bafc38dbdd..c90f606da5a 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -67,8 +67,7 @@ struct file_intermediate_data { compinfo_map; bool compinfo_ready{false}; - // Tracker for eventually deallocating compressed and uncompressed data - std::vector stripe_data; + std::vector> lvl_stripe_data; std::vector>> null_count_prefix_sums; int64_t rows_to_skip; diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 7e8feb4ef49..ea636fc4ddc 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -108,8 +108,6 @@ std::size_t gather_stream_info(std::size_t stripe_index, auto const column_id = *stream.column_id; auto const col_order = orc2gdf[column_id]; - // TODO - // optimize this. if (col_order != -1) { stream_info.emplace_back(stripeinfo->offset + src_offset, dst_offset, @@ -831,8 +829,8 @@ void reader::impl::query_stripe_compression_info() // TODO : remove? if (rows_to_read == 0 || selected_stripes.empty()) { return; } - auto& stripe_data = _file_itm_data->stripe_data; - // lvl_stripe_data.resize(_selected_columns.num_levels()); + auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; + lvl_stripe_data.resize(_selected_columns.num_levels()); // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. std::unordered_map @@ -857,94 +855,94 @@ void reader::impl::query_stripe_compression_info() } } - // Get the total number of stripes across all input files. - std::size_t total_num_stripes = - std::accumulate(selected_stripes.begin(), - selected_stripes.end(), - 0, - [](std::size_t sum, auto& stripe_source_mapping) { - return sum + stripe_source_mapping.stripe_info.size(); - }); - stripe_data.reserve(total_num_stripes); + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + // Get the total number of stripes across all input files. + std::size_t total_num_stripes = + std::accumulate(selected_stripes.begin(), + selected_stripes.end(), + 0, + [](std::size_t sum, auto& stripe_source_mapping) { + return sum + stripe_source_mapping.stripe_info.size(); + }); - int stripe_idx = 0; + // Tracker for eventually deallocating compressed and uncompressed data + auto& stripe_data = lvl_stripe_data[level]; - for (auto const& stripe_source_mapping : selected_stripes) { - // Iterate through the source files selected stripes - for (auto const& stripe : stripe_source_mapping.stripe_info) { - auto const stripe_info = stripe.first; - auto const stripe_footer = stripe.second; + int stripe_idx = 0; - auto stream_count = stream_info.size(); + std::vector, std::size_t>> read_tasks; + for (auto const& stripe_source_mapping : selected_stripes) { + // Iterate through the source files selected stripes + for (auto const& stripe : stripe_source_mapping.stripe_info) { + auto const stripe_info = stripe.first; + auto const stripe_footer = stripe.second; - std::vector, std::size_t>> read_tasks; - std::size_t total_data_size{0}; - for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { - total_data_size += gather_stream_info( + auto stream_count = stream_info.size(); + auto const total_data_size = gather_stream_info( stripe_idx, level, stripe_info, stripe_footer, col_meta.orc_col_map[level], stream_info); - } // end loop level - - auto const is_stripe_data_empty = total_data_size == 0; - CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, - "Invalid index rowgroup stream data"); - - // Buffer needs to be padded. - // Required by `copy_uncompressed_kernel`. - stripe_data.emplace_back(cudf::util::round_up_safe(total_data_size, BUFFER_PADDING_MULTIPLE), - _stream); - auto dst_base = static_cast(stripe_data.back().data()); - - // Coalesce consecutive streams into one read - while (not is_stripe_data_empty and stream_count < stream_info.size()) { - auto const d_dst = dst_base + stream_info[stream_count].dst_pos; - auto const offset = stream_info[stream_count].offset; - auto len = stream_info[stream_count].length; - stream_count++; - - while (stream_count < stream_info.size() && - stream_info[stream_count].offset == offset + len) { - len += stream_info[stream_count].length; + + auto const is_stripe_data_empty = total_data_size == 0; + CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, + "Invalid index rowgroup stream data"); + + // Buffer needs to be padded. + // Required by `copy_uncompressed_kernel`. + stripe_data.emplace_back( + cudf::util::round_up_safe(total_data_size, BUFFER_PADDING_MULTIPLE), _stream); + auto dst_base = static_cast(stripe_data.back().data()); + + // Coalesce consecutive streams into one read + while (not is_stripe_data_empty and stream_count < stream_info.size()) { + auto const d_dst = dst_base + stream_info[stream_count].dst_pos; + auto const offset = stream_info[stream_count].offset; + auto len = stream_info[stream_count].length; stream_count++; - } - if (_metadata.per_file_metadata[stripe_source_mapping.source_idx] - .source->is_device_read_preferred(len)) { - read_tasks.push_back(std::pair( - _metadata.per_file_metadata[stripe_source_mapping.source_idx].source->device_read_async( - offset, len, d_dst, _stream), - len)); - } else { - auto const buffer = - _metadata.per_file_metadata[stripe_source_mapping.source_idx].source->host_read(offset, - len); - CUDF_EXPECTS(buffer->size() == len, "Unexpected discrepancy in bytes read."); - CUDF_CUDA_TRY( - cudaMemcpyAsync(d_dst, buffer->data(), len, cudaMemcpyDefault, _stream.value())); - _stream.synchronize(); + while (stream_count < stream_info.size() && + stream_info[stream_count].offset == offset + len) { + len += stream_info[stream_count].length; + stream_count++; + } + if (_metadata.per_file_metadata[stripe_source_mapping.source_idx] + .source->is_device_read_preferred(len)) { + read_tasks.push_back( + std::pair(_metadata.per_file_metadata[stripe_source_mapping.source_idx] + .source->device_read_async(offset, len, d_dst, _stream), + len)); + + } else { + auto const buffer = + _metadata.per_file_metadata[stripe_source_mapping.source_idx].source->host_read( + offset, len); + CUDF_EXPECTS(buffer->size() == len, "Unexpected discrepancy in bytes read."); + CUDF_CUDA_TRY( + cudaMemcpyAsync(d_dst, buffer->data(), len, cudaMemcpyDefault, _stream.value())); + _stream.synchronize(); + } } - } - stripe_idx++; + stripe_idx++; + } + } + for (auto& task : read_tasks) { + CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); } - } - for (auto& task : read_tasks) { - CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); - } - - if (stripe_data.empty()) { continue; } - // Setup row group descriptors if using indexes - if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { - auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; - cudf::detail::hostdevice_vector compinfo( - 0, stream_info.size(), _stream); + if (stripe_data.empty()) { continue; } - for (auto const& info : stream_info) { - compinfo.push_back(gpu::CompressedStreamInfo( - static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, - info.length)); - stream_compinfo_map[stream_id_info{ - info.stripe_idx, info.level, info.orc_col_idx, info.kind}] = &compinfo[compinfo.size() - 1]; + // Setup row group descriptors if using indexes + if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { + auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; + cudf::detail::hostdevice_vector compinfo( + 0, stream_info.size(), _stream); + + for (auto const& info : stream_info) { + compinfo.push_back(gpu::CompressedStreamInfo( + static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, + info.length)); + stream_compinfo_map[stream_id_info{ + info.stripe_idx, info.level, info.orc_col_idx, info.kind}] = + &compinfo[compinfo.size() - 1]; #if 0 printf("collec stream [%d, %d, %d, %d]: dst = %lu, length = %lu\n", (int)info.stripe_idx, @@ -955,22 +953,22 @@ void reader::impl::query_stripe_compression_info() info.length); fflush(stdout); #endif - } + } - compinfo.host_to_device_async(_stream); + compinfo.host_to_device_async(_stream); - gpu::ParseCompressedStripeData(compinfo.device_ptr(), - compinfo.size(), - decompressor.GetBlockSize(), - decompressor.GetLog2MaxCompressionRatio(), - _stream); - compinfo.device_to_host_sync(_stream); + gpu::ParseCompressedStripeData(compinfo.device_ptr(), + compinfo.size(), + decompressor.GetBlockSize(), + decompressor.GetLog2MaxCompressionRatio(), + _stream); + compinfo.device_to_host_sync(_stream); - auto& compinfo_map = _file_itm_data->compinfo_map; - for (auto& [stream_id, stream_compinfo] : stream_compinfo_map) { - compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, - stream_compinfo->num_uncompressed_blocks, - stream_compinfo->max_uncompressed_size}; + auto& compinfo_map = _file_itm_data->compinfo_map; + for (auto& [stream_id, stream_compinfo] : stream_compinfo_map) { + compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, + stream_compinfo->num_uncompressed_blocks, + stream_compinfo->max_uncompressed_size}; #if 0 printf("cache info [%d, %d, %d, %d]: %lu | %lu | %lu\n", (int)stream_id.stripe_idx, @@ -982,23 +980,23 @@ void reader::impl::query_stripe_compression_info() stream_compinfo->max_uncompressed_size); fflush(stdout); #endif - } + } - // Must clear so we will not overwrite the old compression info stream_id. - stream_info.clear(); - stream_compinfo_map.clear(); + // Must clear so we will not overwrite the old compression info stream_id. + stream_info.clear(); + stream_compinfo_map.clear(); - } else { - // printf("no compression \n"); - // fflush(stdout); + } else { + // printf("no compression \n"); + // fflush(stdout); - // Set decompressed data size equal to the input size. - // TODO - } + // Set decompressed data size equal to the input size. + // TODO + } - // printf(" end level %d\n\n", (int)level); + // printf(" end level %d\n\n", (int)level); - // } // end loop level + } // end loop level // lvl_stripe_data.clear(); _file_itm_data->compinfo_ready = true; @@ -1046,37 +1044,22 @@ void reader::impl::prepare_data(uint64_t skip_rows, : std::make_unique(); }(); - auto& stripe_data = _file_itm_data->stripe_data; + auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; auto& null_count_prefix_sums = _file_itm_data->null_count_prefix_sums; + lvl_stripe_data.resize(_selected_columns.num_levels()); _out_buffers.resize(_selected_columns.num_levels()); // Iterates through levels of nested columns, child column will be one level down // compared to parent column. auto& col_meta = *_col_meta; - - // Get a list of column data types - std::vector> column_types; - column_types.resize(_selected_columns.num_levels()); - - // Association between each ORC column and its cudf::column - std::vector> nested_cols; - nested_cols.resize(_selected_columns.num_levels()); - - // Get the total number of stripes across all input files. - std::size_t total_num_stripes = - std::accumulate(selected_stripes.begin(), - selected_stripes.end(), - 0, - [](std::size_t sum, auto& stripe_source_mapping) { - return sum + stripe_source_mapping.stripe_info.size(); - }); - - std::vector> lvl_chunks; - lvl_chunks.resize(_selected_columns.num_levels()); - for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto& columns_level = _selected_columns.levels[level]; + // Association between each ORC column and its cudf::column + std::vector nested_cols; + + // Get a list of column data types + std::vector column_types; for (auto& col : columns_level) { auto col_type = to_cudf_type(_metadata.get_col_type(col.id).kind, _use_np_dtypes, @@ -1090,35 +1073,29 @@ void reader::impl::prepare_data(uint64_t skip_rows, // follow positive scaling. auto const scale = -static_cast(_metadata.get_col_type(col.id).scale.value_or(0)); - column_types[level].emplace_back(col_type, scale); + column_types.emplace_back(col_type, scale); } else { - column_types[level].emplace_back(col_type); + column_types.emplace_back(col_type); } // Map each ORC column to its column if (col_type == type_id::LIST or col_type == type_id::STRUCT) { - nested_cols[level].emplace_back(col); + nested_cols.emplace_back(col); } } - null_count_prefix_sums.emplace_back(); - null_count_prefix_sums.back().reserve(_selected_columns.levels[level].size()); - std::generate_n(std::back_inserter(null_count_prefix_sums.back()), - _selected_columns.levels[level].size(), - [&]() { - return cudf::detail::make_zeroed_device_uvector_async( - total_num_stripes, _stream, rmm::mr::get_current_device_resource()); - }); - - auto const num_columns = columns_level.size(); - lvl_chunks[level] = - cudf::detail::hostdevice_2dvector(total_num_stripes, num_columns, _stream); - memset(lvl_chunks[level].base_host_ptr(), 0, lvl_chunks[level].size_bytes()); - } - - for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { - auto& columns_level = _selected_columns.levels[level]; + // Get the total number of stripes across all input files. + std::size_t total_num_stripes = + std::accumulate(selected_stripes.begin(), + selected_stripes.end(), + 0, + [](std::size_t sum, auto& stripe_source_mapping) { + return sum + stripe_source_mapping.stripe_info.size(); + }); auto const num_columns = columns_level.size(); + cudf::detail::hostdevice_2dvector chunks( + total_num_stripes, num_columns, _stream); + memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); const bool use_index = _use_index && @@ -1135,19 +1112,31 @@ void reader::impl::prepare_data(uint64_t skip_rows, // Logically view streams as columns std::vector stream_info; + null_count_prefix_sums.emplace_back(); + null_count_prefix_sums.back().reserve(_selected_columns.levels[level].size()); + std::generate_n(std::back_inserter(null_count_prefix_sums.back()), + _selected_columns.levels[level].size(), + [&]() { + return cudf::detail::make_zeroed_device_uvector_async( + total_num_stripes, _stream, rmm::mr::get_current_device_resource()); + }); + + // Tracker for eventually deallocating compressed and uncompressed data + auto& stripe_data = lvl_stripe_data[level]; + std::size_t stripe_start_row = 0; std::size_t num_dict_entries = 0; std::size_t num_rowgroups = 0; int stripe_idx = 0; // std::vector, std::size_t>> read_tasks; - auto& chunks = lvl_chunks[level]; for (auto const& stripe_source_mapping : selected_stripes) { // Iterate through the source files selected stripes for (auto const& stripe : stripe_source_mapping.stripe_info) { auto const stripe_info = stripe.first; auto const stripe_footer = stripe.second; + auto stream_count = stream_info.size(); auto const total_data_size = gather_stream_info_and_update_chunks(stripe_idx, level, From c0ac62cab5bc87a69777f68ea7ec6f2abd062813 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 27 Jan 2024 14:25:43 -0800 Subject: [PATCH 021/321] Add test Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_preprocess.cu | 16 +++++++---- cpp/tests/io/orc_test.cpp | 36 ++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index ea636fc4ddc..6d7d1e28b59 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -14,6 +14,8 @@ * limitations under the License. */ +#define PRINT_DEBUG + #include "reader_impl.hpp" #include "reader_impl_chunking.hpp" #include "reader_impl_helpers.hpp" @@ -69,7 +71,7 @@ struct orc_stream_info { orc_col_idx(orc_col_idx_), kind(kind_) { -#if 0 +#ifdef PRINT_DEBUG printf(" construct stripe id [%d, %d, %d, %d]\n", (int)stripe_idx, (int)level, @@ -253,7 +255,7 @@ rmm::device_buffer decompress_stripe_data( cudf::detail::hostdevice_vector compinfo( 0, stream_info.size(), stream); for (auto const& info : stream_info) { -#if 0 +#ifdef PRINT_DEBUG printf("collec stream again [%d, %d, %d, %d]: dst = %lu, length = %lu\n", (int)info.stripe_idx, (int)info.level, @@ -268,8 +270,12 @@ rmm::device_buffer decompress_stripe_data( static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, info.length)); + printf("line %d\n", __LINE__); + fflush(stdout); auto const& cached_comp_info = compinfo_map.at(stream_id_info{info.stripe_idx, info.level, info.orc_col_idx, info.kind}); + printf("line %d\n", __LINE__); + fflush(stdout); // auto const& cached_comp_info = // compinfo_map[stream_id_info{info.stripe_idx, info.level, info.orc_col_idx, info.kind}]; auto& stream_comp_info = compinfo[compinfo.size() - 1]; @@ -286,7 +292,7 @@ rmm::device_buffer decompress_stripe_data( not((num_uncompressed_blocks + num_compressed_blocks > 0) and (total_decomp_size == 0)), "Inconsistent info on compression blocks"); -#if 0 +#ifdef XXX std::size_t old_num_compressed_blocks = num_compressed_blocks; std::size_t old_num_uncompressed_blocks = num_uncompressed_blocks; std::size_t old_total_decomp_size = total_decomp_size; @@ -943,7 +949,7 @@ void reader::impl::query_stripe_compression_info() stream_compinfo_map[stream_id_info{ info.stripe_idx, info.level, info.orc_col_idx, info.kind}] = &compinfo[compinfo.size() - 1]; -#if 0 +#ifdef PRINT_DEBUG printf("collec stream [%d, %d, %d, %d]: dst = %lu, length = %lu\n", (int)info.stripe_idx, (int)info.level, @@ -969,7 +975,7 @@ void reader::impl::query_stripe_compression_info() compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, stream_compinfo->num_uncompressed_blocks, stream_compinfo->max_uncompressed_size}; -#if 0 +#ifdef PRINT_DEBUG printf("cache info [%d, %d, %d, %d]: %lu | %lu | %lu\n", (int)stream_id.stripe_idx, (int)stream_id.level, diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index 2ae6edc6c7d..8cdec659ce0 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -1123,6 +1124,41 @@ TEST_F(OrcWriterTest, SlicedValidMask) cudf::test::expect_metadata_equal(expected_metadata, result.metadata); } +TEST_F(OrcReaderTest, Test1) +{ + std::string filepath1 = + "/home/nghiat/Devel/cudf/1/python/cudf/cudf/tests/data/orc/" + "TestOrcFile.boolean_corruption_PR_6636.orc"; + + std::string filepath2 = + "/home/nghiat/Devel/cudf/1/python/cudf/cudf/tests/data/orc/" + "TestOrcFile.boolean_corruption_PR_6702.orc"; + + { + printf("test1\n"); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{{filepath1}}); + auto result = cudf::io::read_orc(read_opts); + for (int i = 0; i < result.tbl->num_columns(); i++) { + auto& col = result.tbl->get_column(i); + cudf::test::print(col); + printf("\n"); + } + } + + { + printf("test2\n"); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{{filepath2}}); + auto result = cudf::io::read_orc(read_opts); + for (int i = 0; i < result.tbl->num_columns(); i++) { + auto& col = result.tbl->get_column(i); + cudf::test::print(col); + printf("\n"); + } + } +} + TEST_F(OrcReaderTest, SingleInputs) { srand(31533); From 6049725b1418ecb8ef0807a5ba5cc3912ca3da33 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 27 Jan 2024 14:49:28 -0800 Subject: [PATCH 022/321] Fix bug Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_preprocess.cu | 36 ++++++++++++++++++++---- cpp/tests/io/orc_test.cpp | 2 ++ 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 6d7d1e28b59..2a0e2aef57b 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#define PRINT_DEBUG +// #define PRINT_DEBUG #include "reader_impl.hpp" #include "reader_impl_chunking.hpp" @@ -96,6 +96,8 @@ std::size_t gather_stream_info(std::size_t stripe_index, orc::StripeInformation const* stripeinfo, orc::StripeFooter const* stripefooter, host_span orc2gdf, + host_span types, + bool apply_struct_map, std::vector& stream_info) { uint64_t src_offset = 0; @@ -108,9 +110,25 @@ std::size_t gather_stream_info(std::size_t stripe_index, } auto const column_id = *stream.column_id; - auto const col_order = orc2gdf[column_id]; + auto col = orc2gdf[column_id]; + printf("first construct col id = %d, order = %d\n", (int)column_id, (int)col); + + if (col == -1 and apply_struct_map) { + // A struct-type column has no data itself, but rather child columns + // for each of its fields. There is only a PRESENT stream, which + // needs to be included for the reader. + auto const schema_type = types[column_id]; + if (not schema_type.subtypes.empty()) { + if (schema_type.kind == orc::STRUCT && stream.kind == orc::PRESENT) { + for (auto const& idx : schema_type.subtypes) { + auto child_idx = (idx < orc2gdf.size()) ? orc2gdf[idx] : -1; + if (child_idx >= 0) { col = child_idx; } + } + } + } + } - if (col_order != -1) { + if (col != -1) { stream_info.emplace_back(stripeinfo->offset + src_offset, dst_offset, stream.length, @@ -167,6 +185,7 @@ std::size_t gather_stream_info_and_update_chunks( auto const column_id = *stream.column_id; auto col = orc2gdf[column_id]; + printf("construct col id = %d, order = %d\n", (int)column_id, (int)col); if (col == -1 and apply_struct_map) { // A struct-type column has no data itself, but rather child columns @@ -205,6 +224,7 @@ std::size_t gather_stream_info_and_update_chunks( } } + printf("before construct col id = %d, order = %d\n", (int)column_id, (int)col); stream_info.emplace_back(stripeinfo->offset + src_offset, dst_offset, stream.length, @@ -884,8 +904,14 @@ void reader::impl::query_stripe_compression_info() auto const stripe_footer = stripe.second; auto stream_count = stream_info.size(); - auto const total_data_size = gather_stream_info( - stripe_idx, level, stripe_info, stripe_footer, col_meta.orc_col_map[level], stream_info); + auto const total_data_size = gather_stream_info(stripe_idx, + level, + stripe_info, + stripe_footer, + col_meta.orc_col_map[level], + _metadata.get_types(), + level == 0, + stream_info); auto const is_stripe_data_empty = total_data_size == 0; CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index 8cdec659ce0..5ff27bd9e10 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -1124,6 +1124,7 @@ TEST_F(OrcWriterTest, SlicedValidMask) cudf::test::expect_metadata_equal(expected_metadata, result.metadata); } +#if 0 TEST_F(OrcReaderTest, Test1) { std::string filepath1 = @@ -1159,6 +1160,7 @@ TEST_F(OrcReaderTest, Test1) } } +#endif TEST_F(OrcReaderTest, SingleInputs) { srand(31533); From 1fa634b089121ae071bd39b3639785c0f794e564 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 27 Jan 2024 14:52:08 -0800 Subject: [PATCH 023/321] Remove debug info Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_preprocess.cu | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 2a0e2aef57b..4f9dac15d98 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -111,7 +111,6 @@ std::size_t gather_stream_info(std::size_t stripe_index, auto const column_id = *stream.column_id; auto col = orc2gdf[column_id]; - printf("first construct col id = %d, order = %d\n", (int)column_id, (int)col); if (col == -1 and apply_struct_map) { // A struct-type column has no data itself, but rather child columns @@ -185,7 +184,6 @@ std::size_t gather_stream_info_and_update_chunks( auto const column_id = *stream.column_id; auto col = orc2gdf[column_id]; - printf("construct col id = %d, order = %d\n", (int)column_id, (int)col); if (col == -1 and apply_struct_map) { // A struct-type column has no data itself, but rather child columns @@ -224,7 +222,6 @@ std::size_t gather_stream_info_and_update_chunks( } } - printf("before construct col id = %d, order = %d\n", (int)column_id, (int)col); stream_info.emplace_back(stripeinfo->offset + src_offset, dst_offset, stream.length, @@ -290,12 +287,12 @@ rmm::device_buffer decompress_stripe_data( static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, info.length)); - printf("line %d\n", __LINE__); - fflush(stdout); + // printf("line %d\n", __LINE__); + // fflush(stdout); auto const& cached_comp_info = compinfo_map.at(stream_id_info{info.stripe_idx, info.level, info.orc_col_idx, info.kind}); - printf("line %d\n", __LINE__); - fflush(stdout); + // printf("line %d\n", __LINE__); + // fflush(stdout); // auto const& cached_comp_info = // compinfo_map[stream_id_info{info.stripe_idx, info.level, info.orc_col_idx, info.kind}]; auto& stream_comp_info = compinfo[compinfo.size() - 1]; From 57bd6d512238f46229b09c424b6bc31afe2e5150 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 27 Jan 2024 15:04:52 -0800 Subject: [PATCH 024/321] Separate implementation Signed-off-by: Nghia Truong --- cpp/CMakeLists.txt | 1 + cpp/src/io/orc/reader_impl_chunking.cu | 304 +++++++++++++++++++++++ cpp/src/io/orc/reader_impl_chunking.hpp | 36 +++ cpp/src/io/orc/reader_impl_preprocess.cu | 283 --------------------- 4 files changed, 341 insertions(+), 283 deletions(-) create mode 100644 cpp/src/io/orc/reader_impl_chunking.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d4ed6c113b9..49c19596d23 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -387,6 +387,7 @@ add_library( src/io/orc/dict_enc.cu src/io/orc/orc.cpp src/io/orc/reader_impl.cu + src/io/orc/reader_impl_chunking.cu src/io/orc/reader_impl_helpers.cpp src/io/orc/reader_impl_preprocess.cu src/io/orc/stats_enc.cu diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu new file mode 100644 index 00000000000..028ea624749 --- /dev/null +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// #define PRINT_DEBUG + +#include "reader_impl.hpp" +#include "reader_impl_chunking.hpp" +#include "reader_impl_helpers.hpp" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace cudf::io::orc::detail { + +namespace { + +/** + * @brief Function that populates column descriptors stream/chunk + */ +std::size_t gather_stream_info(std::size_t stripe_index, + std::size_t level, + orc::StripeInformation const* stripeinfo, + orc::StripeFooter const* stripefooter, + host_span orc2gdf, + host_span types, + bool apply_struct_map, + std::vector& stream_info) +{ + uint64_t src_offset = 0; + uint64_t dst_offset = 0; + + for (auto const& stream : stripefooter->streams) { + if (!stream.column_id || *stream.column_id >= orc2gdf.size()) { + dst_offset += stream.length; + continue; + } + + auto const column_id = *stream.column_id; + auto col = orc2gdf[column_id]; + + if (col == -1 and apply_struct_map) { + // A struct-type column has no data itself, but rather child columns + // for each of its fields. There is only a PRESENT stream, which + // needs to be included for the reader. + auto const schema_type = types[column_id]; + if (not schema_type.subtypes.empty()) { + if (schema_type.kind == orc::STRUCT && stream.kind == orc::PRESENT) { + for (auto const& idx : schema_type.subtypes) { + auto child_idx = (idx < orc2gdf.size()) ? orc2gdf[idx] : -1; + if (child_idx >= 0) { col = child_idx; } + } + } + } + } + + if (col != -1) { + stream_info.emplace_back(stripeinfo->offset + src_offset, + dst_offset, + stream.length, + stripe_index, + level, + column_id, + stream.kind); + dst_offset += stream.length; + } + src_offset += stream.length; + } + + return dst_offset; +} + +} // namespace + +void reader::impl::query_stripe_compression_info() +{ + if (_file_itm_data->compinfo_ready) { return; } + if (_selected_columns.num_levels() == 0) { return; } + + auto const rows_to_skip = _file_itm_data->rows_to_skip; + auto const rows_to_read = _file_itm_data->rows_to_read; + auto const& selected_stripes = _file_itm_data->selected_stripes; + + // If no rows or stripes to read, return empty columns + // TODO : remove? + if (rows_to_read == 0 || selected_stripes.empty()) { return; } + + auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; + lvl_stripe_data.resize(_selected_columns.num_levels()); + + // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. + std::unordered_map + stream_compinfo_map; + + // Logically view streams as columns + std::vector stream_info; + stream_info.reserve(selected_stripes.size() * selected_stripes.front().stripe_info.size()); + + // Iterates through levels of nested columns, child column will be one level down + // compared to parent column. + auto& col_meta = *_col_meta; + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + auto& columns_level = _selected_columns.levels[level]; + // Association between each ORC column and its cudf::column + col_meta.orc_col_map.emplace_back(_metadata.get_num_cols(), -1); + + size_type col_id{0}; + for (auto& col : columns_level) { + // Map each ORC column to its column + col_meta.orc_col_map[level][col.id] = col_id++; + } + } + + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + // Get the total number of stripes across all input files. + std::size_t total_num_stripes = + std::accumulate(selected_stripes.begin(), + selected_stripes.end(), + 0, + [](std::size_t sum, auto& stripe_source_mapping) { + return sum + stripe_source_mapping.stripe_info.size(); + }); + + // Tracker for eventually deallocating compressed and uncompressed data + auto& stripe_data = lvl_stripe_data[level]; + + int stripe_idx = 0; + + std::vector, std::size_t>> read_tasks; + for (auto const& stripe_source_mapping : selected_stripes) { + // Iterate through the source files selected stripes + for (auto const& stripe : stripe_source_mapping.stripe_info) { + auto const stripe_info = stripe.first; + auto const stripe_footer = stripe.second; + + auto stream_count = stream_info.size(); + auto const total_data_size = gather_stream_info(stripe_idx, + level, + stripe_info, + stripe_footer, + col_meta.orc_col_map[level], + _metadata.get_types(), + level == 0, + stream_info); + + auto const is_stripe_data_empty = total_data_size == 0; + CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, + "Invalid index rowgroup stream data"); + + // Buffer needs to be padded. + // Required by `copy_uncompressed_kernel`. + stripe_data.emplace_back( + cudf::util::round_up_safe(total_data_size, BUFFER_PADDING_MULTIPLE), _stream); + auto dst_base = static_cast(stripe_data.back().data()); + + // Coalesce consecutive streams into one read + while (not is_stripe_data_empty and stream_count < stream_info.size()) { + auto const d_dst = dst_base + stream_info[stream_count].dst_pos; + auto const offset = stream_info[stream_count].offset; + auto len = stream_info[stream_count].length; + stream_count++; + + while (stream_count < stream_info.size() && + stream_info[stream_count].offset == offset + len) { + len += stream_info[stream_count].length; + stream_count++; + } + if (_metadata.per_file_metadata[stripe_source_mapping.source_idx] + .source->is_device_read_preferred(len)) { + read_tasks.push_back( + std::pair(_metadata.per_file_metadata[stripe_source_mapping.source_idx] + .source->device_read_async(offset, len, d_dst, _stream), + len)); + + } else { + auto const buffer = + _metadata.per_file_metadata[stripe_source_mapping.source_idx].source->host_read( + offset, len); + CUDF_EXPECTS(buffer->size() == len, "Unexpected discrepancy in bytes read."); + CUDF_CUDA_TRY( + cudaMemcpyAsync(d_dst, buffer->data(), len, cudaMemcpyDefault, _stream.value())); + _stream.synchronize(); + } + } + + stripe_idx++; + } + } + for (auto& task : read_tasks) { + CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); + } + + if (stripe_data.empty()) { continue; } + + // Setup row group descriptors if using indexes + if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { + auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; + cudf::detail::hostdevice_vector compinfo( + 0, stream_info.size(), _stream); + + for (auto const& info : stream_info) { + compinfo.push_back(gpu::CompressedStreamInfo( + static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, + info.length)); + stream_compinfo_map[stream_id_info{ + info.stripe_idx, info.level, info.orc_col_idx, info.kind}] = + &compinfo[compinfo.size() - 1]; +#ifdef PRINT_DEBUG + printf("collec stream [%d, %d, %d, %d]: dst = %lu, length = %lu\n", + (int)info.stripe_idx, + (int)info.level, + (int)info.orc_col_idx, + (int)info.kind, + info.dst_pos, + info.length); + fflush(stdout); +#endif + } + + compinfo.host_to_device_async(_stream); + + gpu::ParseCompressedStripeData(compinfo.device_ptr(), + compinfo.size(), + decompressor.GetBlockSize(), + decompressor.GetLog2MaxCompressionRatio(), + _stream); + compinfo.device_to_host_sync(_stream); + + auto& compinfo_map = _file_itm_data->compinfo_map; + for (auto& [stream_id, stream_compinfo] : stream_compinfo_map) { + compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, + stream_compinfo->num_uncompressed_blocks, + stream_compinfo->max_uncompressed_size}; +#ifdef PRINT_DEBUG + printf("cache info [%d, %d, %d, %d]: %lu | %lu | %lu\n", + (int)stream_id.stripe_idx, + (int)stream_id.level, + (int)stream_id.orc_col_idx, + (int)stream_id.kind, + (size_t)stream_compinfo->num_compressed_blocks, + (size_t)stream_compinfo->num_uncompressed_blocks, + stream_compinfo->max_uncompressed_size); + fflush(stdout); +#endif + } + + // Must clear so we will not overwrite the old compression info stream_id. + stream_info.clear(); + stream_compinfo_map.clear(); + + } else { + // printf("no compression \n"); + // fflush(stdout); + + // Set decompressed data size equal to the input size. + // TODO + } + + // printf(" end level %d\n\n", (int)level); + + } // end loop level + + // lvl_stripe_data.clear(); + _file_itm_data->compinfo_ready = true; +} + +} // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index c90f606da5a..c8743001928 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -30,6 +30,42 @@ namespace cudf::io::orc::detail { +/** + * @brief Struct that maps ORC streams to columns + */ +struct orc_stream_info { + explicit orc_stream_info(uint64_t offset_, + std::size_t dst_pos_, + uint32_t length_, + uint32_t stripe_idx_, + std::size_t level_, + uint32_t orc_col_idx_, + StreamKind kind_) + : offset(offset_), + dst_pos(dst_pos_), + length(length_), + stripe_idx(stripe_idx_), + level(level_), + orc_col_idx(orc_col_idx_), + kind(kind_) + { +#ifdef PRINT_DEBUG + printf(" construct stripe id [%d, %d, %d, %d]\n", + (int)stripe_idx, + (int)level, + (int)orc_col_idx, + (int)kind); +#endif + } + uint64_t offset; // offset in file + std::size_t dst_pos; // offset in memory relative to start of compressed stripe data + std::size_t length; // length in file + uint32_t stripe_idx; // stripe processing index, not stripe index in source + std::size_t level; // TODO + uint32_t orc_col_idx; + StreamKind kind; +}; + // unify this with orc_stream_info struct stream_id_info { std::size_t stripe_idx; diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 4f9dac15d98..c40b22e0b93 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -52,97 +52,6 @@ namespace cudf::io::orc::detail { namespace { -/** - * @brief Struct that maps ORC streams to columns - */ -struct orc_stream_info { - explicit orc_stream_info(uint64_t offset_, - std::size_t dst_pos_, - uint32_t length_, - uint32_t stripe_idx_, - std::size_t level_, - uint32_t orc_col_idx_, - StreamKind kind_) - : offset(offset_), - dst_pos(dst_pos_), - length(length_), - stripe_idx(stripe_idx_), - level(level_), - orc_col_idx(orc_col_idx_), - kind(kind_) - { -#ifdef PRINT_DEBUG - printf(" construct stripe id [%d, %d, %d, %d]\n", - (int)stripe_idx, - (int)level, - (int)orc_col_idx, - (int)kind); -#endif - } - uint64_t offset; // offset in file - std::size_t dst_pos; // offset in memory relative to start of compressed stripe data - std::size_t length; // length in file - uint32_t stripe_idx; // stripe processing index, not stripe index in source - std::size_t level; // TODO - uint32_t orc_col_idx; - StreamKind kind; -}; - -/** - * @brief Function that populates column descriptors stream/chunk - */ -std::size_t gather_stream_info(std::size_t stripe_index, - std::size_t level, - orc::StripeInformation const* stripeinfo, - orc::StripeFooter const* stripefooter, - host_span orc2gdf, - host_span types, - bool apply_struct_map, - std::vector& stream_info) -{ - uint64_t src_offset = 0; - uint64_t dst_offset = 0; - - for (auto const& stream : stripefooter->streams) { - if (!stream.column_id || *stream.column_id >= orc2gdf.size()) { - dst_offset += stream.length; - continue; - } - - auto const column_id = *stream.column_id; - auto col = orc2gdf[column_id]; - - if (col == -1 and apply_struct_map) { - // A struct-type column has no data itself, but rather child columns - // for each of its fields. There is only a PRESENT stream, which - // needs to be included for the reader. - auto const schema_type = types[column_id]; - if (not schema_type.subtypes.empty()) { - if (schema_type.kind == orc::STRUCT && stream.kind == orc::PRESENT) { - for (auto const& idx : schema_type.subtypes) { - auto child_idx = (idx < orc2gdf.size()) ? orc2gdf[idx] : -1; - if (child_idx >= 0) { col = child_idx; } - } - } - } - } - - if (col != -1) { - stream_info.emplace_back(stripeinfo->offset + src_offset, - dst_offset, - stream.length, - stripe_index, - level, - column_id, - stream.kind); - dst_offset += stream.length; - } - src_offset += stream.length; - } - - return dst_offset; -} - /** * @brief Function that populates column descriptors stream/chunk */ @@ -839,198 +748,6 @@ void generate_offsets_for_list(host_span buff_data, rmm::cuda_ } // namespace -void reader::impl::query_stripe_compression_info() -{ - if (_file_itm_data->compinfo_ready) { return; } - if (_selected_columns.num_levels() == 0) { return; } - - auto const rows_to_skip = _file_itm_data->rows_to_skip; - auto const rows_to_read = _file_itm_data->rows_to_read; - auto const& selected_stripes = _file_itm_data->selected_stripes; - - // If no rows or stripes to read, return empty columns - // TODO : remove? - if (rows_to_read == 0 || selected_stripes.empty()) { return; } - - auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; - lvl_stripe_data.resize(_selected_columns.num_levels()); - - // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. - std::unordered_map - stream_compinfo_map; - - // Logically view streams as columns - std::vector stream_info; - stream_info.reserve(selected_stripes.size() * selected_stripes.front().stripe_info.size()); - - // Iterates through levels of nested columns, child column will be one level down - // compared to parent column. - auto& col_meta = *_col_meta; - for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { - auto& columns_level = _selected_columns.levels[level]; - // Association between each ORC column and its cudf::column - col_meta.orc_col_map.emplace_back(_metadata.get_num_cols(), -1); - - size_type col_id{0}; - for (auto& col : columns_level) { - // Map each ORC column to its column - col_meta.orc_col_map[level][col.id] = col_id++; - } - } - - for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { - // Get the total number of stripes across all input files. - std::size_t total_num_stripes = - std::accumulate(selected_stripes.begin(), - selected_stripes.end(), - 0, - [](std::size_t sum, auto& stripe_source_mapping) { - return sum + stripe_source_mapping.stripe_info.size(); - }); - - // Tracker for eventually deallocating compressed and uncompressed data - auto& stripe_data = lvl_stripe_data[level]; - - int stripe_idx = 0; - - std::vector, std::size_t>> read_tasks; - for (auto const& stripe_source_mapping : selected_stripes) { - // Iterate through the source files selected stripes - for (auto const& stripe : stripe_source_mapping.stripe_info) { - auto const stripe_info = stripe.first; - auto const stripe_footer = stripe.second; - - auto stream_count = stream_info.size(); - auto const total_data_size = gather_stream_info(stripe_idx, - level, - stripe_info, - stripe_footer, - col_meta.orc_col_map[level], - _metadata.get_types(), - level == 0, - stream_info); - - auto const is_stripe_data_empty = total_data_size == 0; - CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, - "Invalid index rowgroup stream data"); - - // Buffer needs to be padded. - // Required by `copy_uncompressed_kernel`. - stripe_data.emplace_back( - cudf::util::round_up_safe(total_data_size, BUFFER_PADDING_MULTIPLE), _stream); - auto dst_base = static_cast(stripe_data.back().data()); - - // Coalesce consecutive streams into one read - while (not is_stripe_data_empty and stream_count < stream_info.size()) { - auto const d_dst = dst_base + stream_info[stream_count].dst_pos; - auto const offset = stream_info[stream_count].offset; - auto len = stream_info[stream_count].length; - stream_count++; - - while (stream_count < stream_info.size() && - stream_info[stream_count].offset == offset + len) { - len += stream_info[stream_count].length; - stream_count++; - } - if (_metadata.per_file_metadata[stripe_source_mapping.source_idx] - .source->is_device_read_preferred(len)) { - read_tasks.push_back( - std::pair(_metadata.per_file_metadata[stripe_source_mapping.source_idx] - .source->device_read_async(offset, len, d_dst, _stream), - len)); - - } else { - auto const buffer = - _metadata.per_file_metadata[stripe_source_mapping.source_idx].source->host_read( - offset, len); - CUDF_EXPECTS(buffer->size() == len, "Unexpected discrepancy in bytes read."); - CUDF_CUDA_TRY( - cudaMemcpyAsync(d_dst, buffer->data(), len, cudaMemcpyDefault, _stream.value())); - _stream.synchronize(); - } - } - - stripe_idx++; - } - } - for (auto& task : read_tasks) { - CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); - } - - if (stripe_data.empty()) { continue; } - - // Setup row group descriptors if using indexes - if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { - auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; - cudf::detail::hostdevice_vector compinfo( - 0, stream_info.size(), _stream); - - for (auto const& info : stream_info) { - compinfo.push_back(gpu::CompressedStreamInfo( - static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, - info.length)); - stream_compinfo_map[stream_id_info{ - info.stripe_idx, info.level, info.orc_col_idx, info.kind}] = - &compinfo[compinfo.size() - 1]; -#ifdef PRINT_DEBUG - printf("collec stream [%d, %d, %d, %d]: dst = %lu, length = %lu\n", - (int)info.stripe_idx, - (int)info.level, - (int)info.orc_col_idx, - (int)info.kind, - info.dst_pos, - info.length); - fflush(stdout); -#endif - } - - compinfo.host_to_device_async(_stream); - - gpu::ParseCompressedStripeData(compinfo.device_ptr(), - compinfo.size(), - decompressor.GetBlockSize(), - decompressor.GetLog2MaxCompressionRatio(), - _stream); - compinfo.device_to_host_sync(_stream); - - auto& compinfo_map = _file_itm_data->compinfo_map; - for (auto& [stream_id, stream_compinfo] : stream_compinfo_map) { - compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, - stream_compinfo->num_uncompressed_blocks, - stream_compinfo->max_uncompressed_size}; -#ifdef PRINT_DEBUG - printf("cache info [%d, %d, %d, %d]: %lu | %lu | %lu\n", - (int)stream_id.stripe_idx, - (int)stream_id.level, - (int)stream_id.orc_col_idx, - (int)stream_id.kind, - (size_t)stream_compinfo->num_compressed_blocks, - (size_t)stream_compinfo->num_uncompressed_blocks, - stream_compinfo->max_uncompressed_size); - fflush(stdout); -#endif - } - - // Must clear so we will not overwrite the old compression info stream_id. - stream_info.clear(); - stream_compinfo_map.clear(); - - } else { - // printf("no compression \n"); - // fflush(stdout); - - // Set decompressed data size equal to the input size. - // TODO - } - - // printf(" end level %d\n\n", (int)level); - - } // end loop level - - // lvl_stripe_data.clear(); - _file_itm_data->compinfo_ready = true; -} - void reader::impl::prepare_data(uint64_t skip_rows, std::optional const& num_rows_opt, std::vector> const& stripes) From 40af354fdf518418f110aeda7e651bbb7844339b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 27 Jan 2024 15:34:27 -0800 Subject: [PATCH 025/321] Cache stream_info Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.hpp | 5 +++ cpp/src/io/orc/reader_impl_chunking.cu | 7 ++-- cpp/src/io/orc/reader_impl_chunking.hpp | 2 + cpp/src/io/orc/reader_impl_preprocess.cu | 51 ++++++++++++++++-------- 4 files changed, 46 insertions(+), 19 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index b0869125fe9..8130ac51f6d 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -77,6 +77,11 @@ class reader::impl { std::optional const& num_rows_opt, std::vector> const& stripes); + /** + * + */ + void create_pass_data(); + /** * @brief Compute stripe sizes. */ diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 028ea624749..5590f53858c 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -130,8 +130,7 @@ void reader::impl::query_stripe_compression_info() stream_compinfo_map; // Logically view streams as columns - std::vector stream_info; - stream_info.reserve(selected_stripes.size() * selected_stripes.front().stripe_info.size()); + _file_itm_data->lvl_stream_info.resize(_selected_columns.num_levels()); // Iterates through levels of nested columns, child column will be one level down // compared to parent column. @@ -149,6 +148,9 @@ void reader::impl::query_stripe_compression_info() } for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + auto& stream_info = _file_itm_data->lvl_stream_info[level]; + stream_info.reserve(selected_stripes.size() * selected_stripes.front().stripe_info.size()); + // Get the total number of stripes across all input files. std::size_t total_num_stripes = std::accumulate(selected_stripes.begin(), @@ -282,7 +284,6 @@ void reader::impl::query_stripe_compression_info() } // Must clear so we will not overwrite the old compression info stream_id. - stream_info.clear(); stream_compinfo_map.clear(); } else { diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index c8743001928..83954a7dd1b 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -106,6 +106,8 @@ struct file_intermediate_data { std::vector> lvl_stripe_data; std::vector>> null_count_prefix_sums; + std::vector> lvl_stream_info; + int64_t rows_to_skip; size_type rows_to_read; std::vector selected_stripes; diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index c40b22e0b93..44b3a138c69 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -65,7 +65,7 @@ std::size_t gather_stream_info_and_update_chunks( bool use_index, bool apply_struct_map, std::size_t* num_dictionary_entries, - std::vector& stream_info, + std::size_t* stream_idx, cudf::detail::hostdevice_2dvector& chunks) { uint64_t src_offset = 0; @@ -106,7 +106,7 @@ std::size_t gather_stream_info_and_update_chunks( if (child_idx >= 0) { col = child_idx; auto& chunk = chunks[stripe_index][col]; - chunk.strm_id[gpu::CI_PRESENT] = stream_info.size(); + chunk.strm_id[gpu::CI_PRESENT] = *stream_idx; chunk.strm_len[gpu::CI_PRESENT] = stream.length; } } @@ -118,7 +118,7 @@ std::size_t gather_stream_info_and_update_chunks( auto& chunk = chunks[stripe_index][col]; auto const index_type = get_stream_index_type(stream.kind); if (index_type < gpu::CI_NUM_STREAMS) { - chunk.strm_id[index_type] = stream_info.size(); + chunk.strm_id[index_type] = *stream_idx; chunk.strm_len[index_type] = stream.length; // NOTE: skip_count field is temporarily used to track the presence of index streams chunk.skip_count |= 1 << index_type; @@ -131,13 +131,7 @@ std::size_t gather_stream_info_and_update_chunks( } } - stream_info.emplace_back(stripeinfo->offset + src_offset, - dst_offset, - stream.length, - stripe_index, - level, - column_id, - stream.kind); + (*stream_idx)++; dst_offset += stream.length; } src_offset += stream.length; @@ -165,7 +159,7 @@ rmm::device_buffer decompress_stripe_data( compinfo_map, OrcDecompressor const& decompressor, host_span stripe_data, - host_span stream_info, + host_span stream_info, cudf::detail::hostdevice_2dvector& chunks, cudf::detail::hostdevice_2dvector& row_groups, std::size_t num_stripes, @@ -288,7 +282,7 @@ rmm::device_buffer decompress_stripe_data( compinfo[i].copy_in_ctl = inflate_in.data() + start_pos_uncomp; compinfo[i].copy_out_ctl = inflate_out.data() + start_pos_uncomp; - stream_info[i].dst_pos = decomp_offset; + // stream_info[i].dst_pos = decomp_offset; decomp_offset += compinfo[i].max_uncompressed_size; start_pos += compinfo[i].num_compressed_blocks; start_pos_uncomp += compinfo[i].num_uncompressed_blocks; @@ -748,6 +742,31 @@ void generate_offsets_for_list(host_span buff_data, rmm::cuda_ } // namespace +void reader::impl::create_pass_data() +{ + auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; + lvl_stripe_data.resize(_selected_columns.num_levels()); + + auto const& selected_stripes = _file_itm_data->selected_stripes; + + // Logically view streams as columns + std::vector stream_info; + stream_info.reserve(selected_stripes.size() * selected_stripes.front().stripe_info.size()); + + auto& col_meta = *_col_meta; + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + auto& columns_level = _selected_columns.levels[level]; + // Association between each ORC column and its cudf::column + col_meta.orc_col_map.emplace_back(_metadata.get_num_cols(), -1); + + size_type col_id{0}; + for (auto& col : columns_level) { + // Map each ORC column to its column + col_meta.orc_col_map[level][col.id] = col_id++; + } + } +} + void reader::impl::prepare_data(uint64_t skip_rows, std::optional const& num_rows_opt, std::vector> const& stripes) @@ -856,7 +875,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, (rows_to_skip == 0); // Logically view streams as columns - std::vector stream_info; + auto const& stream_info = _file_itm_data->lvl_stream_info[level]; null_count_prefix_sums.emplace_back(); null_count_prefix_sums.back().reserve(_selected_columns.levels[level].size()); @@ -873,7 +892,8 @@ void reader::impl::prepare_data(uint64_t skip_rows, std::size_t stripe_start_row = 0; std::size_t num_dict_entries = 0; std::size_t num_rowgroups = 0; - int stripe_idx = 0; + std::size_t stripe_idx = 0; + std::size_t stream_idx = 0; // std::vector, std::size_t>> read_tasks; for (auto const& stripe_source_mapping : selected_stripes) { @@ -882,7 +902,6 @@ void reader::impl::prepare_data(uint64_t skip_rows, auto const stripe_info = stripe.first; auto const stripe_footer = stripe.second; - auto stream_count = stream_info.size(); auto const total_data_size = gather_stream_info_and_update_chunks(stripe_idx, level, @@ -893,7 +912,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, use_index, level == 0, &num_dict_entries, - stream_info, + &stream_idx, chunks); auto const is_stripe_data_empty = total_data_size == 0; From 10a598eeec539c9100dfd4bb2a9e28fc3b67ba43 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 27 Jan 2024 16:15:55 -0800 Subject: [PATCH 026/321] Cache everything Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 135 ++++++++++++++++---- cpp/src/io/orc/reader_impl_chunking.hpp | 4 +- cpp/src/io/orc/reader_impl_preprocess.cu | 155 ++++++----------------- 3 files changed, 151 insertions(+), 143 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 5590f53858c..19c76ada8f9 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -55,18 +55,36 @@ namespace { /** * @brief Function that populates column descriptors stream/chunk */ -std::size_t gather_stream_info(std::size_t stripe_index, - std::size_t level, - orc::StripeInformation const* stripeinfo, - orc::StripeFooter const* stripefooter, - host_span orc2gdf, - host_span types, - bool apply_struct_map, - std::vector& stream_info) +std::size_t gather_stream_info_and_update_chunks( + std::size_t stripe_index, + std::size_t level, + orc::StripeInformation const* stripeinfo, + orc::StripeFooter const* stripefooter, + host_span orc2gdf, + host_span types, + bool use_index, + bool apply_struct_map, + std::size_t* num_dictionary_entries, + std::vector& stream_info, + cudf::detail::hostdevice_2dvector& chunks) { uint64_t src_offset = 0; uint64_t dst_offset = 0; + auto const get_stream_index_type = [](orc::StreamKind kind) { + switch (kind) { + case orc::DATA: return gpu::CI_DATA; + case orc::LENGTH: + case orc::SECONDARY: return gpu::CI_DATA2; + case orc::DICTIONARY_DATA: return gpu::CI_DICTIONARY; + case orc::PRESENT: return gpu::CI_PRESENT; + case orc::ROW_INDEX: return gpu::CI_INDEX; + default: + // Skip this stream as it's not strictly required + return gpu::CI_NUM_STREAMS; + } + }; + for (auto const& stream : stripefooter->streams) { if (!stream.column_id || *stream.column_id >= orc2gdf.size()) { dst_offset += stream.length; @@ -85,13 +103,34 @@ std::size_t gather_stream_info(std::size_t stripe_index, if (schema_type.kind == orc::STRUCT && stream.kind == orc::PRESENT) { for (auto const& idx : schema_type.subtypes) { auto child_idx = (idx < orc2gdf.size()) ? orc2gdf[idx] : -1; - if (child_idx >= 0) { col = child_idx; } + if (child_idx >= 0) { + col = child_idx; + auto& chunk = chunks[stripe_index][col]; + chunk.strm_id[gpu::CI_PRESENT] = stream_info.size(); + chunk.strm_len[gpu::CI_PRESENT] = stream.length; + } } } } } - if (col != -1) { + if (src_offset >= stripeinfo->indexLength || use_index) { + auto& chunk = chunks[stripe_index][col]; + auto const index_type = get_stream_index_type(stream.kind); + if (index_type < gpu::CI_NUM_STREAMS) { + chunk.strm_id[index_type] = stream_info.size(); + chunk.strm_len[index_type] = stream.length; + // NOTE: skip_count field is temporarily used to track the presence of index streams + chunk.skip_count |= 1 << index_type; + + if (index_type == gpu::CI_DICTIONARY) { + chunk.dictionary_start = *num_dictionary_entries; + chunk.dict_len = stripefooter->columns[column_id].dictionarySize; + *num_dictionary_entries += stripefooter->columns[column_id].dictionarySize; + } + } + } + stream_info.emplace_back(stripeinfo->offset + src_offset, dst_offset, stream.length, @@ -122,8 +161,15 @@ void reader::impl::query_stripe_compression_info() // TODO : remove? if (rows_to_read == 0 || selected_stripes.empty()) { return; } - auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; + auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; + auto& lvl_data_chunks = _file_itm_data->lvl_data_chunks; + auto& lvl_num_dict_entries = _file_itm_data->lvl_num_dict_entries; + auto& lvl_stripe_is_empty = _file_itm_data->lvl_stripe_is_empty; + lvl_stripe_data.resize(_selected_columns.num_levels()); + lvl_data_chunks.resize(_selected_columns.num_levels()); + lvl_num_dict_entries.resize(_selected_columns.num_levels()); + lvl_stripe_is_empty.resize(_selected_columns.num_levels()); // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. std::unordered_map @@ -148,9 +194,6 @@ void reader::impl::query_stripe_compression_info() } for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { - auto& stream_info = _file_itm_data->lvl_stream_info[level]; - stream_info.reserve(selected_stripes.size() * selected_stripes.front().stripe_info.size()); - // Get the total number of stripes across all input files. std::size_t total_num_stripes = std::accumulate(selected_stripes.begin(), @@ -159,11 +202,37 @@ void reader::impl::query_stripe_compression_info() [](std::size_t sum, auto& stripe_source_mapping) { return sum + stripe_source_mapping.stripe_info.size(); }); + auto& columns_level = _selected_columns.levels[level]; + auto const num_columns = columns_level.size(); + _file_itm_data->lvl_data_chunks[level] = + cudf::detail::hostdevice_2dvector(total_num_stripes, num_columns, _stream); + auto& chunks = _file_itm_data->lvl_data_chunks[level]; + memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); + + auto& stream_info = _file_itm_data->lvl_stream_info[level]; + stream_info.reserve(selected_stripes.size() * selected_stripes.front().stripe_info.size() * + num_columns); + + const bool use_index = + _use_index && + // Do stripes have row group index + _metadata.is_row_grp_idx_present() && + // Only use if we don't have much work with complete columns & stripes + // TODO: Consider nrows, gpu, and tune the threshold + (rows_to_read > _metadata.get_row_index_stride() && !(_metadata.get_row_index_stride() & 7) && + _metadata.get_row_index_stride() > 0 && num_columns * total_num_stripes < 8 * 128) && + // Only use if first row is aligned to a stripe boundary + // TODO: Fix logic to handle unaligned rows + (rows_to_skip == 0); // Tracker for eventually deallocating compressed and uncompressed data auto& stripe_data = lvl_stripe_data[level]; - int stripe_idx = 0; + lvl_stripe_is_empty[level].reserve(selected_stripes.size() * + selected_stripes.front().stripe_info.size()); + + std::size_t num_dict_entries = 0; + std::size_t stripe_idx = 0; std::vector, std::size_t>> read_tasks; for (auto const& stripe_source_mapping : selected_stripes) { @@ -172,15 +241,30 @@ void reader::impl::query_stripe_compression_info() auto const stripe_info = stripe.first; auto const stripe_footer = stripe.second; - auto stream_count = stream_info.size(); - auto const total_data_size = gather_stream_info(stripe_idx, - level, - stripe_info, - stripe_footer, - col_meta.orc_col_map[level], - _metadata.get_types(), - level == 0, - stream_info); + auto stream_count = stream_info.size(); + auto const total_data_size = + gather_stream_info_and_update_chunks(stripe_idx, + level, + stripe_info, + stripe_footer, + col_meta.orc_col_map[level], + _metadata.get_types(), + use_index, + level == 0, + &num_dict_entries, + stream_info, + chunks); + + lvl_stripe_is_empty[level].push_back(total_data_size == 0); + + // auto const total_data_size = gather_stream_info(stripe_idx, + // level, + // stripe_info, + // stripe_footer, + // col_meta.orc_col_map[level], + // _metadata.get_types(), + // level == 0, + // stream_info); auto const is_stripe_data_empty = total_data_size == 0; CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, @@ -225,12 +309,15 @@ void reader::impl::query_stripe_compression_info() stripe_idx++; } } + for (auto& task : read_tasks) { CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); } if (stripe_data.empty()) { continue; } + lvl_num_dict_entries[level] = num_dict_entries; + // Setup row group descriptors if using indexes if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 83954a7dd1b..fab218f0fde 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -105,8 +105,10 @@ struct file_intermediate_data { std::vector> lvl_stripe_data; std::vector>> null_count_prefix_sums; - + std::vector> lvl_data_chunks; std::vector> lvl_stream_info; + std::vector lvl_num_dict_entries; + std::vector> lvl_stripe_is_empty; int64_t rows_to_skip; size_type rows_to_read; diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 44b3a138c69..328eb27b781 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -52,94 +52,6 @@ namespace cudf::io::orc::detail { namespace { -/** - * @brief Function that populates column descriptors stream/chunk - */ -std::size_t gather_stream_info_and_update_chunks( - std::size_t stripe_index, - std::size_t level, - orc::StripeInformation const* stripeinfo, - orc::StripeFooter const* stripefooter, - host_span orc2gdf, - host_span types, - bool use_index, - bool apply_struct_map, - std::size_t* num_dictionary_entries, - std::size_t* stream_idx, - cudf::detail::hostdevice_2dvector& chunks) -{ - uint64_t src_offset = 0; - uint64_t dst_offset = 0; - - auto const get_stream_index_type = [](orc::StreamKind kind) { - switch (kind) { - case orc::DATA: return gpu::CI_DATA; - case orc::LENGTH: - case orc::SECONDARY: return gpu::CI_DATA2; - case orc::DICTIONARY_DATA: return gpu::CI_DICTIONARY; - case orc::PRESENT: return gpu::CI_PRESENT; - case orc::ROW_INDEX: return gpu::CI_INDEX; - default: - // Skip this stream as it's not strictly required - return gpu::CI_NUM_STREAMS; - } - }; - - for (auto const& stream : stripefooter->streams) { - if (!stream.column_id || *stream.column_id >= orc2gdf.size()) { - dst_offset += stream.length; - continue; - } - - auto const column_id = *stream.column_id; - auto col = orc2gdf[column_id]; - - if (col == -1 and apply_struct_map) { - // A struct-type column has no data itself, but rather child columns - // for each of its fields. There is only a PRESENT stream, which - // needs to be included for the reader. - auto const schema_type = types[column_id]; - if (not schema_type.subtypes.empty()) { - if (schema_type.kind == orc::STRUCT && stream.kind == orc::PRESENT) { - for (auto const& idx : schema_type.subtypes) { - auto child_idx = (idx < orc2gdf.size()) ? orc2gdf[idx] : -1; - if (child_idx >= 0) { - col = child_idx; - auto& chunk = chunks[stripe_index][col]; - chunk.strm_id[gpu::CI_PRESENT] = *stream_idx; - chunk.strm_len[gpu::CI_PRESENT] = stream.length; - } - } - } - } - } - if (col != -1) { - if (src_offset >= stripeinfo->indexLength || use_index) { - auto& chunk = chunks[stripe_index][col]; - auto const index_type = get_stream_index_type(stream.kind); - if (index_type < gpu::CI_NUM_STREAMS) { - chunk.strm_id[index_type] = *stream_idx; - chunk.strm_len[index_type] = stream.length; - // NOTE: skip_count field is temporarily used to track the presence of index streams - chunk.skip_count |= 1 << index_type; - - if (index_type == gpu::CI_DICTIONARY) { - chunk.dictionary_start = *num_dictionary_entries; - chunk.dict_len = stripefooter->columns[column_id].dictionarySize; - *num_dictionary_entries += stripefooter->columns[column_id].dictionarySize; - } - } - } - - (*stream_idx)++; - dst_offset += stream.length; - } - src_offset += stream.length; - } - - return dst_offset; -} - /** * @brief Decompresses the stripe data, at stream granularity. * @@ -858,9 +770,10 @@ void reader::impl::prepare_data(uint64_t skip_rows, return sum + stripe_source_mapping.stripe_info.size(); }); auto const num_columns = columns_level.size(); - cudf::detail::hostdevice_2dvector chunks( - total_num_stripes, num_columns, _stream); - memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); + + auto& lvl_chunks = _file_itm_data->lvl_data_chunks[level]; + auto& lvl_num_dict_entries = _file_itm_data->lvl_num_dict_entries; + auto& lvl_stripe_is_empty = _file_itm_data->lvl_stripe_is_empty[level]; const bool use_index = _use_index && @@ -890,10 +803,11 @@ void reader::impl::prepare_data(uint64_t skip_rows, auto& stripe_data = lvl_stripe_data[level]; std::size_t stripe_start_row = 0; - std::size_t num_dict_entries = 0; - std::size_t num_rowgroups = 0; - std::size_t stripe_idx = 0; - std::size_t stream_idx = 0; + // std::size_t num_dict_entries = 0; + auto const num_dict_entries = lvl_num_dict_entries[level]; + std::size_t num_rowgroups = 0; + std::size_t stripe_idx = 0; + // std::size_t stream_idx = 0; // std::vector, std::size_t>> read_tasks; for (auto const& stripe_source_mapping : selected_stripes) { @@ -902,20 +816,20 @@ void reader::impl::prepare_data(uint64_t skip_rows, auto const stripe_info = stripe.first; auto const stripe_footer = stripe.second; - auto const total_data_size = - gather_stream_info_and_update_chunks(stripe_idx, - level, - stripe_info, - stripe_footer, - col_meta.orc_col_map[level], - _metadata.get_types(), - use_index, - level == 0, - &num_dict_entries, - &stream_idx, - chunks); - - auto const is_stripe_data_empty = total_data_size == 0; + // auto const total_data_size = + // gather_stream_info_and_update_chunks(stripe_idx, + // level, + // stripe_info, + // stripe_footer, + // col_meta.orc_col_map[level], + // _metadata.get_types(), + // use_index, + // level == 0, + // &num_dict_entries, + // &stream_idx, + // chunks); + + auto const is_stripe_data_empty = lvl_stripe_is_empty[stripe_idx]; CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, "Invalid index rowgroup stream data"); @@ -930,7 +844,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, } // Update chunks to reference streams pointers for (std::size_t col_idx = 0; col_idx < num_columns; col_idx++) { - auto& chunk = chunks[stripe_idx][col_idx]; + auto& chunk = lvl_chunks[stripe_idx][col_idx]; // start row, number of rows in a each stripe and total number of rows // may change in lower levels of nesting chunk.start_row = (level == 0) @@ -1010,7 +924,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, *_metadata.per_file_metadata[0].decompressor, stripe_data, stream_info, - chunks, + lvl_chunks, row_groups, total_num_stripes, _metadata.get_row_index_stride(), @@ -1020,12 +934,12 @@ void reader::impl::prepare_data(uint64_t skip_rows, stripe_data.push_back(std::move(decomp_data)); } else { if (row_groups.size().first) { - chunks.host_to_device_async(_stream); + lvl_chunks.host_to_device_async(_stream); row_groups.host_to_device_async(_stream); row_groups.host_to_device_async(_stream); gpu::ParseRowGroupIndex(row_groups.base_device_ptr(), nullptr, - chunks.base_device_ptr(), + lvl_chunks.base_device_ptr(), num_columns, total_num_stripes, num_rowgroups, @@ -1038,7 +952,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, for (std::size_t i = 0; i < column_types.size(); ++i) { bool is_nullable = false; for (std::size_t j = 0; j < total_num_stripes; ++j) { - if (chunks[j][i].strm_len[gpu::CI_PRESENT] != 0) { + if (lvl_chunks[j][i].strm_len[gpu::CI_PRESENT] != 0) { is_nullable = true; break; } @@ -1055,7 +969,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, _metadata.get_row_index_stride(), level, tz_table->view(), - chunks, + lvl_chunks, row_groups, _out_buffers[level], _stream, @@ -1063,11 +977,16 @@ void reader::impl::prepare_data(uint64_t skip_rows, if (nested_cols.size()) { // Extract information to process nested child columns - scan_null_counts(chunks, null_count_prefix_sums[level], _stream); + scan_null_counts(lvl_chunks, null_count_prefix_sums[level], _stream); row_groups.device_to_host_sync(_stream); - aggregate_child_meta( - level, _selected_columns, chunks, row_groups, nested_cols, _out_buffers[level], col_meta); + aggregate_child_meta(level, + _selected_columns, + lvl_chunks, + row_groups, + nested_cols, + _out_buffers[level], + col_meta); // ORC stores number of elements at each row, so we need to generate offsets from that std::vector buff_data; From 80d41db68feebcd53bbc1ea48073977e539f6324 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 27 Jan 2024 18:15:01 -0800 Subject: [PATCH 027/321] Revert "Cache everything" This reverts commit 10a598eeec539c9100dfd4bb2a9e28fc3b67ba43. --- cpp/src/io/orc/reader_impl_chunking.cu | 135 ++++---------------- cpp/src/io/orc/reader_impl_chunking.hpp | 4 +- cpp/src/io/orc/reader_impl_preprocess.cu | 155 +++++++++++++++++------ 3 files changed, 143 insertions(+), 151 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 19c76ada8f9..5590f53858c 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -55,36 +55,18 @@ namespace { /** * @brief Function that populates column descriptors stream/chunk */ -std::size_t gather_stream_info_and_update_chunks( - std::size_t stripe_index, - std::size_t level, - orc::StripeInformation const* stripeinfo, - orc::StripeFooter const* stripefooter, - host_span orc2gdf, - host_span types, - bool use_index, - bool apply_struct_map, - std::size_t* num_dictionary_entries, - std::vector& stream_info, - cudf::detail::hostdevice_2dvector& chunks) +std::size_t gather_stream_info(std::size_t stripe_index, + std::size_t level, + orc::StripeInformation const* stripeinfo, + orc::StripeFooter const* stripefooter, + host_span orc2gdf, + host_span types, + bool apply_struct_map, + std::vector& stream_info) { uint64_t src_offset = 0; uint64_t dst_offset = 0; - auto const get_stream_index_type = [](orc::StreamKind kind) { - switch (kind) { - case orc::DATA: return gpu::CI_DATA; - case orc::LENGTH: - case orc::SECONDARY: return gpu::CI_DATA2; - case orc::DICTIONARY_DATA: return gpu::CI_DICTIONARY; - case orc::PRESENT: return gpu::CI_PRESENT; - case orc::ROW_INDEX: return gpu::CI_INDEX; - default: - // Skip this stream as it's not strictly required - return gpu::CI_NUM_STREAMS; - } - }; - for (auto const& stream : stripefooter->streams) { if (!stream.column_id || *stream.column_id >= orc2gdf.size()) { dst_offset += stream.length; @@ -103,34 +85,13 @@ std::size_t gather_stream_info_and_update_chunks( if (schema_type.kind == orc::STRUCT && stream.kind == orc::PRESENT) { for (auto const& idx : schema_type.subtypes) { auto child_idx = (idx < orc2gdf.size()) ? orc2gdf[idx] : -1; - if (child_idx >= 0) { - col = child_idx; - auto& chunk = chunks[stripe_index][col]; - chunk.strm_id[gpu::CI_PRESENT] = stream_info.size(); - chunk.strm_len[gpu::CI_PRESENT] = stream.length; - } + if (child_idx >= 0) { col = child_idx; } } } } } - if (col != -1) { - if (src_offset >= stripeinfo->indexLength || use_index) { - auto& chunk = chunks[stripe_index][col]; - auto const index_type = get_stream_index_type(stream.kind); - if (index_type < gpu::CI_NUM_STREAMS) { - chunk.strm_id[index_type] = stream_info.size(); - chunk.strm_len[index_type] = stream.length; - // NOTE: skip_count field is temporarily used to track the presence of index streams - chunk.skip_count |= 1 << index_type; - - if (index_type == gpu::CI_DICTIONARY) { - chunk.dictionary_start = *num_dictionary_entries; - chunk.dict_len = stripefooter->columns[column_id].dictionarySize; - *num_dictionary_entries += stripefooter->columns[column_id].dictionarySize; - } - } - } + if (col != -1) { stream_info.emplace_back(stripeinfo->offset + src_offset, dst_offset, stream.length, @@ -161,15 +122,8 @@ void reader::impl::query_stripe_compression_info() // TODO : remove? if (rows_to_read == 0 || selected_stripes.empty()) { return; } - auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; - auto& lvl_data_chunks = _file_itm_data->lvl_data_chunks; - auto& lvl_num_dict_entries = _file_itm_data->lvl_num_dict_entries; - auto& lvl_stripe_is_empty = _file_itm_data->lvl_stripe_is_empty; - + auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; lvl_stripe_data.resize(_selected_columns.num_levels()); - lvl_data_chunks.resize(_selected_columns.num_levels()); - lvl_num_dict_entries.resize(_selected_columns.num_levels()); - lvl_stripe_is_empty.resize(_selected_columns.num_levels()); // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. std::unordered_map @@ -194,6 +148,9 @@ void reader::impl::query_stripe_compression_info() } for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + auto& stream_info = _file_itm_data->lvl_stream_info[level]; + stream_info.reserve(selected_stripes.size() * selected_stripes.front().stripe_info.size()); + // Get the total number of stripes across all input files. std::size_t total_num_stripes = std::accumulate(selected_stripes.begin(), @@ -202,37 +159,11 @@ void reader::impl::query_stripe_compression_info() [](std::size_t sum, auto& stripe_source_mapping) { return sum + stripe_source_mapping.stripe_info.size(); }); - auto& columns_level = _selected_columns.levels[level]; - auto const num_columns = columns_level.size(); - _file_itm_data->lvl_data_chunks[level] = - cudf::detail::hostdevice_2dvector(total_num_stripes, num_columns, _stream); - auto& chunks = _file_itm_data->lvl_data_chunks[level]; - memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); - - auto& stream_info = _file_itm_data->lvl_stream_info[level]; - stream_info.reserve(selected_stripes.size() * selected_stripes.front().stripe_info.size() * - num_columns); - - const bool use_index = - _use_index && - // Do stripes have row group index - _metadata.is_row_grp_idx_present() && - // Only use if we don't have much work with complete columns & stripes - // TODO: Consider nrows, gpu, and tune the threshold - (rows_to_read > _metadata.get_row_index_stride() && !(_metadata.get_row_index_stride() & 7) && - _metadata.get_row_index_stride() > 0 && num_columns * total_num_stripes < 8 * 128) && - // Only use if first row is aligned to a stripe boundary - // TODO: Fix logic to handle unaligned rows - (rows_to_skip == 0); // Tracker for eventually deallocating compressed and uncompressed data auto& stripe_data = lvl_stripe_data[level]; - lvl_stripe_is_empty[level].reserve(selected_stripes.size() * - selected_stripes.front().stripe_info.size()); - - std::size_t num_dict_entries = 0; - std::size_t stripe_idx = 0; + int stripe_idx = 0; std::vector, std::size_t>> read_tasks; for (auto const& stripe_source_mapping : selected_stripes) { @@ -241,30 +172,15 @@ void reader::impl::query_stripe_compression_info() auto const stripe_info = stripe.first; auto const stripe_footer = stripe.second; - auto stream_count = stream_info.size(); - auto const total_data_size = - gather_stream_info_and_update_chunks(stripe_idx, - level, - stripe_info, - stripe_footer, - col_meta.orc_col_map[level], - _metadata.get_types(), - use_index, - level == 0, - &num_dict_entries, - stream_info, - chunks); - - lvl_stripe_is_empty[level].push_back(total_data_size == 0); - - // auto const total_data_size = gather_stream_info(stripe_idx, - // level, - // stripe_info, - // stripe_footer, - // col_meta.orc_col_map[level], - // _metadata.get_types(), - // level == 0, - // stream_info); + auto stream_count = stream_info.size(); + auto const total_data_size = gather_stream_info(stripe_idx, + level, + stripe_info, + stripe_footer, + col_meta.orc_col_map[level], + _metadata.get_types(), + level == 0, + stream_info); auto const is_stripe_data_empty = total_data_size == 0; CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, @@ -309,15 +225,12 @@ void reader::impl::query_stripe_compression_info() stripe_idx++; } } - for (auto& task : read_tasks) { CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); } if (stripe_data.empty()) { continue; } - lvl_num_dict_entries[level] = num_dict_entries; - // Setup row group descriptors if using indexes if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index fab218f0fde..83954a7dd1b 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -105,10 +105,8 @@ struct file_intermediate_data { std::vector> lvl_stripe_data; std::vector>> null_count_prefix_sums; - std::vector> lvl_data_chunks; + std::vector> lvl_stream_info; - std::vector lvl_num_dict_entries; - std::vector> lvl_stripe_is_empty; int64_t rows_to_skip; size_type rows_to_read; diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 328eb27b781..44b3a138c69 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -52,6 +52,94 @@ namespace cudf::io::orc::detail { namespace { +/** + * @brief Function that populates column descriptors stream/chunk + */ +std::size_t gather_stream_info_and_update_chunks( + std::size_t stripe_index, + std::size_t level, + orc::StripeInformation const* stripeinfo, + orc::StripeFooter const* stripefooter, + host_span orc2gdf, + host_span types, + bool use_index, + bool apply_struct_map, + std::size_t* num_dictionary_entries, + std::size_t* stream_idx, + cudf::detail::hostdevice_2dvector& chunks) +{ + uint64_t src_offset = 0; + uint64_t dst_offset = 0; + + auto const get_stream_index_type = [](orc::StreamKind kind) { + switch (kind) { + case orc::DATA: return gpu::CI_DATA; + case orc::LENGTH: + case orc::SECONDARY: return gpu::CI_DATA2; + case orc::DICTIONARY_DATA: return gpu::CI_DICTIONARY; + case orc::PRESENT: return gpu::CI_PRESENT; + case orc::ROW_INDEX: return gpu::CI_INDEX; + default: + // Skip this stream as it's not strictly required + return gpu::CI_NUM_STREAMS; + } + }; + + for (auto const& stream : stripefooter->streams) { + if (!stream.column_id || *stream.column_id >= orc2gdf.size()) { + dst_offset += stream.length; + continue; + } + + auto const column_id = *stream.column_id; + auto col = orc2gdf[column_id]; + + if (col == -1 and apply_struct_map) { + // A struct-type column has no data itself, but rather child columns + // for each of its fields. There is only a PRESENT stream, which + // needs to be included for the reader. + auto const schema_type = types[column_id]; + if (not schema_type.subtypes.empty()) { + if (schema_type.kind == orc::STRUCT && stream.kind == orc::PRESENT) { + for (auto const& idx : schema_type.subtypes) { + auto child_idx = (idx < orc2gdf.size()) ? orc2gdf[idx] : -1; + if (child_idx >= 0) { + col = child_idx; + auto& chunk = chunks[stripe_index][col]; + chunk.strm_id[gpu::CI_PRESENT] = *stream_idx; + chunk.strm_len[gpu::CI_PRESENT] = stream.length; + } + } + } + } + } + if (col != -1) { + if (src_offset >= stripeinfo->indexLength || use_index) { + auto& chunk = chunks[stripe_index][col]; + auto const index_type = get_stream_index_type(stream.kind); + if (index_type < gpu::CI_NUM_STREAMS) { + chunk.strm_id[index_type] = *stream_idx; + chunk.strm_len[index_type] = stream.length; + // NOTE: skip_count field is temporarily used to track the presence of index streams + chunk.skip_count |= 1 << index_type; + + if (index_type == gpu::CI_DICTIONARY) { + chunk.dictionary_start = *num_dictionary_entries; + chunk.dict_len = stripefooter->columns[column_id].dictionarySize; + *num_dictionary_entries += stripefooter->columns[column_id].dictionarySize; + } + } + } + + (*stream_idx)++; + dst_offset += stream.length; + } + src_offset += stream.length; + } + + return dst_offset; +} + /** * @brief Decompresses the stripe data, at stream granularity. * @@ -770,10 +858,9 @@ void reader::impl::prepare_data(uint64_t skip_rows, return sum + stripe_source_mapping.stripe_info.size(); }); auto const num_columns = columns_level.size(); - - auto& lvl_chunks = _file_itm_data->lvl_data_chunks[level]; - auto& lvl_num_dict_entries = _file_itm_data->lvl_num_dict_entries; - auto& lvl_stripe_is_empty = _file_itm_data->lvl_stripe_is_empty[level]; + cudf::detail::hostdevice_2dvector chunks( + total_num_stripes, num_columns, _stream); + memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); const bool use_index = _use_index && @@ -803,11 +890,10 @@ void reader::impl::prepare_data(uint64_t skip_rows, auto& stripe_data = lvl_stripe_data[level]; std::size_t stripe_start_row = 0; - // std::size_t num_dict_entries = 0; - auto const num_dict_entries = lvl_num_dict_entries[level]; - std::size_t num_rowgroups = 0; - std::size_t stripe_idx = 0; - // std::size_t stream_idx = 0; + std::size_t num_dict_entries = 0; + std::size_t num_rowgroups = 0; + std::size_t stripe_idx = 0; + std::size_t stream_idx = 0; // std::vector, std::size_t>> read_tasks; for (auto const& stripe_source_mapping : selected_stripes) { @@ -816,20 +902,20 @@ void reader::impl::prepare_data(uint64_t skip_rows, auto const stripe_info = stripe.first; auto const stripe_footer = stripe.second; - // auto const total_data_size = - // gather_stream_info_and_update_chunks(stripe_idx, - // level, - // stripe_info, - // stripe_footer, - // col_meta.orc_col_map[level], - // _metadata.get_types(), - // use_index, - // level == 0, - // &num_dict_entries, - // &stream_idx, - // chunks); - - auto const is_stripe_data_empty = lvl_stripe_is_empty[stripe_idx]; + auto const total_data_size = + gather_stream_info_and_update_chunks(stripe_idx, + level, + stripe_info, + stripe_footer, + col_meta.orc_col_map[level], + _metadata.get_types(), + use_index, + level == 0, + &num_dict_entries, + &stream_idx, + chunks); + + auto const is_stripe_data_empty = total_data_size == 0; CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, "Invalid index rowgroup stream data"); @@ -844,7 +930,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, } // Update chunks to reference streams pointers for (std::size_t col_idx = 0; col_idx < num_columns; col_idx++) { - auto& chunk = lvl_chunks[stripe_idx][col_idx]; + auto& chunk = chunks[stripe_idx][col_idx]; // start row, number of rows in a each stripe and total number of rows // may change in lower levels of nesting chunk.start_row = (level == 0) @@ -924,7 +1010,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, *_metadata.per_file_metadata[0].decompressor, stripe_data, stream_info, - lvl_chunks, + chunks, row_groups, total_num_stripes, _metadata.get_row_index_stride(), @@ -934,12 +1020,12 @@ void reader::impl::prepare_data(uint64_t skip_rows, stripe_data.push_back(std::move(decomp_data)); } else { if (row_groups.size().first) { - lvl_chunks.host_to_device_async(_stream); + chunks.host_to_device_async(_stream); row_groups.host_to_device_async(_stream); row_groups.host_to_device_async(_stream); gpu::ParseRowGroupIndex(row_groups.base_device_ptr(), nullptr, - lvl_chunks.base_device_ptr(), + chunks.base_device_ptr(), num_columns, total_num_stripes, num_rowgroups, @@ -952,7 +1038,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, for (std::size_t i = 0; i < column_types.size(); ++i) { bool is_nullable = false; for (std::size_t j = 0; j < total_num_stripes; ++j) { - if (lvl_chunks[j][i].strm_len[gpu::CI_PRESENT] != 0) { + if (chunks[j][i].strm_len[gpu::CI_PRESENT] != 0) { is_nullable = true; break; } @@ -969,7 +1055,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, _metadata.get_row_index_stride(), level, tz_table->view(), - lvl_chunks, + chunks, row_groups, _out_buffers[level], _stream, @@ -977,16 +1063,11 @@ void reader::impl::prepare_data(uint64_t skip_rows, if (nested_cols.size()) { // Extract information to process nested child columns - scan_null_counts(lvl_chunks, null_count_prefix_sums[level], _stream); + scan_null_counts(chunks, null_count_prefix_sums[level], _stream); row_groups.device_to_host_sync(_stream); - aggregate_child_meta(level, - _selected_columns, - lvl_chunks, - row_groups, - nested_cols, - _out_buffers[level], - col_meta); + aggregate_child_meta( + level, _selected_columns, chunks, row_groups, nested_cols, _out_buffers[level], col_meta); // ORC stores number of elements at each row, so we need to generate offsets from that std::vector buff_data; From 2bbe9eef190447ecb4bb6868ad21b21700ff0e21 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 27 Jan 2024 20:26:01 -0800 Subject: [PATCH 028/321] Rewrite stripe selection Signed-off-by: Nghia Truong --- cpp/src/io/orc/aggregate_orc_metadata.cpp | 44 ++++-- cpp/src/io/orc/aggregate_orc_metadata.hpp | 12 +- cpp/src/io/orc/orc.hpp | 8 +- cpp/src/io/orc/reader_impl_chunking.cu | 119 +++++++-------- cpp/src/io/orc/reader_impl_chunking.hpp | 2 +- cpp/src/io/orc/reader_impl_preprocess.cu | 173 ++++++++++------------ 6 files changed, 177 insertions(+), 181 deletions(-) diff --git a/cpp/src/io/orc/aggregate_orc_metadata.cpp b/cpp/src/io/orc/aggregate_orc_metadata.cpp index 02bf74e9c01..6be812d4604 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.cpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.cpp @@ -152,7 +152,7 @@ aggregate_orc_metadata::aggregate_orc_metadata( } } -std::tuple> +std::tuple> aggregate_orc_metadata::select_stripes( std::vector> const& user_specified_stripes, uint64_t skip_rows, @@ -167,7 +167,16 @@ aggregate_orc_metadata::select_stripes( return cudf::io::detail::skip_rows_num_rows_from_options(skip_rows, num_rows, get_num_rows()); }(); - std::vector selected_stripes_mapping; + struct stripe_source_mapping { + stripe_source_mapping(int source_idx, std::vector&& stripe_info) + : source_idx(source_idx), stripe_info(std::move(stripe_info)) + { + } + int source_idx; + std::vector stripe_info; + }; + + std::vector selected_stripes_mapping; if (!user_specified_stripes.empty()) { CUDF_EXPECTS(user_specified_stripes.size() == per_file_metadata.size(), @@ -176,7 +185,7 @@ aggregate_orc_metadata::select_stripes( // Each vector entry represents a source file; each nested vector represents the // user_defined_stripes to get from that source file for (size_t src_file_idx = 0; src_file_idx < user_specified_stripes.size(); ++src_file_idx) { - std::vector stripe_infos; + std::vector stripe_infos; // Coalesce stripe info at the source file later since that makes downstream processing much // easier in impl::read @@ -185,13 +194,15 @@ aggregate_orc_metadata::select_stripes( stripe_idx >= 0 and stripe_idx < static_cast( per_file_metadata[src_file_idx].ff.stripes.size()), "Invalid stripe index"); - stripe_infos.push_back( - std::pair(&per_file_metadata[src_file_idx].ff.stripes[stripe_idx], nullptr)); + stripe_infos.push_back({&per_file_metadata[src_file_idx].ff.stripes[stripe_idx], + nullptr, + static_cast(src_file_idx)}); // TODO: check for overflow here. rows_to_read += per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows; } - selected_stripes_mapping.push_back({static_cast(src_file_idx), stripe_infos}); + selected_stripes_mapping.emplace_back(static_cast(src_file_idx), + std::move(stripe_infos)); } } else { uint64_t count = 0; @@ -200,33 +211,37 @@ aggregate_orc_metadata::select_stripes( for (size_t src_file_idx = 0; src_file_idx < per_file_metadata.size() && count < rows_to_skip + rows_to_read; ++src_file_idx) { - std::vector stripe_infos; + std::vector stripe_infos; for (size_t stripe_idx = 0; stripe_idx < per_file_metadata[src_file_idx].ff.stripes.size() && count < rows_to_skip + rows_to_read; ++stripe_idx) { count += per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows; if (count > rows_to_skip || count == 0) { - stripe_infos.push_back( - std::pair(&per_file_metadata[src_file_idx].ff.stripes[stripe_idx], nullptr)); + stripe_infos.push_back({&per_file_metadata[src_file_idx].ff.stripes[stripe_idx], + nullptr, + static_cast(src_file_idx)}); } else { stripe_skip_rows = count; } } - selected_stripes_mapping.push_back({static_cast(src_file_idx), stripe_infos}); + selected_stripes_mapping.emplace_back(static_cast(src_file_idx), + std::move(stripe_infos)); } // Need to remove skipped rows from the stripes which are not selected. rows_to_skip -= stripe_skip_rows; } + std::vector output; + // Read each stripe's stripefooter metadata for (auto& mapping : selected_stripes_mapping) { // Resize to all stripe_info for the source level per_file_metadata[mapping.source_idx].stripefooters.resize(mapping.stripe_info.size()); for (size_t i = 0; i < mapping.stripe_info.size(); i++) { - auto const stripe = mapping.stripe_info[i].first; + auto const stripe = mapping.stripe_info[i].stripe_info; auto const sf_comp_offset = stripe->offset + stripe->indexLength + stripe->dataLength; auto const sf_comp_length = stripe->footerLength; CUDF_EXPECTS( @@ -238,12 +253,15 @@ aggregate_orc_metadata::select_stripes( {buffer->data(), buffer->size()}, stream); ProtobufReader(sf_data.data(), sf_data.size()) .read(per_file_metadata[mapping.source_idx].stripefooters[i]); - mapping.stripe_info[i].second = &per_file_metadata[mapping.source_idx].stripefooters[i]; + mapping.stripe_info[i].stripe_footer = + &per_file_metadata[mapping.source_idx].stripefooters[i]; if (stripe->indexLength == 0) { row_grp_idx_present = false; } } + + output.insert(output.end(), mapping.stripe_info.begin(), mapping.stripe_info.end()); } - return {rows_to_skip, rows_to_read, selected_stripes_mapping}; + return {rows_to_skip, rows_to_read, std::move(output)}; } column_hierarchy aggregate_orc_metadata::select_columns( diff --git a/cpp/src/io/orc/aggregate_orc_metadata.hpp b/cpp/src/io/orc/aggregate_orc_metadata.hpp index f05946a4346..f6bba46b4c8 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.hpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.hpp @@ -45,8 +45,6 @@ struct column_hierarchy { * to aggregate that metadata from all the files. */ class aggregate_orc_metadata { - using OrcStripeInfo = std::pair; - /** * @brief Sums up the number of rows of each source */ @@ -113,11 +111,11 @@ class aggregate_orc_metadata { * * Stripes are potentially selected from multiple files. */ - [[nodiscard]] std::tuple> - select_stripes(std::vector> const& user_specified_stripes, - uint64_t skip_rows, - std::optional const& num_rows, - rmm::cuda_stream_view stream); + [[nodiscard]] std::tuple> select_stripes( + std::vector> const& user_specified_stripes, + uint64_t skip_rows, + std::optional const& num_rows, + rmm::cuda_stream_view stream); /** * @brief Filters ORC file to a selection of columns, based on their paths in the file. diff --git a/cpp/src/io/orc/orc.hpp b/cpp/src/io/orc/orc.hpp index 4f3e0a82768..d17291d4acb 100644 --- a/cpp/src/io/orc/orc.hpp +++ b/cpp/src/io/orc/orc.hpp @@ -601,13 +601,13 @@ struct column_validity_info { * convenience methods for initializing and accessing metadata. */ class metadata { - using OrcStripeInfo = std::pair; - public: - struct stripe_source_mapping { + struct OrcStripeInfo { + StripeInformation const* stripe_info; + StripeFooter const* stripe_footer; int source_idx; - std::vector stripe_info; }; + std::vector stripe_info; public: explicit metadata(datasource* const src, rmm::cuda_stream_view stream); diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 5590f53858c..e58e804d449 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -148,17 +148,12 @@ void reader::impl::query_stripe_compression_info() } for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { - auto& stream_info = _file_itm_data->lvl_stream_info[level]; - stream_info.reserve(selected_stripes.size() * selected_stripes.front().stripe_info.size()); + auto& stream_info = _file_itm_data->lvl_stream_info[level]; + auto const num_columns = _selected_columns.levels[level].size(); + stream_info.reserve(selected_stripes.size() * num_columns); // Get the total number of stripes across all input files. - std::size_t total_num_stripes = - std::accumulate(selected_stripes.begin(), - selected_stripes.end(), - 0, - [](std::size_t sum, auto& stripe_source_mapping) { - return sum + stripe_source_mapping.stripe_info.size(); - }); + std::size_t total_num_stripes = selected_stripes.size(); // Tracker for eventually deallocating compressed and uncompressed data auto& stripe_data = lvl_stripe_data[level]; @@ -166,65 +161,61 @@ void reader::impl::query_stripe_compression_info() int stripe_idx = 0; std::vector, std::size_t>> read_tasks; - for (auto const& stripe_source_mapping : selected_stripes) { - // Iterate through the source files selected stripes - for (auto const& stripe : stripe_source_mapping.stripe_info) { - auto const stripe_info = stripe.first; - auto const stripe_footer = stripe.second; - - auto stream_count = stream_info.size(); - auto const total_data_size = gather_stream_info(stripe_idx, - level, - stripe_info, - stripe_footer, - col_meta.orc_col_map[level], - _metadata.get_types(), - level == 0, - stream_info); - - auto const is_stripe_data_empty = total_data_size == 0; - CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, - "Invalid index rowgroup stream data"); - - // Buffer needs to be padded. - // Required by `copy_uncompressed_kernel`. - stripe_data.emplace_back( - cudf::util::round_up_safe(total_data_size, BUFFER_PADDING_MULTIPLE), _stream); - auto dst_base = static_cast(stripe_data.back().data()); - - // Coalesce consecutive streams into one read - while (not is_stripe_data_empty and stream_count < stream_info.size()) { - auto const d_dst = dst_base + stream_info[stream_count].dst_pos; - auto const offset = stream_info[stream_count].offset; - auto len = stream_info[stream_count].length; + for (auto const& stripe : selected_stripes) { + auto const stripe_info = stripe.stripe_info; + auto const stripe_footer = stripe.stripe_footer; + + auto stream_count = stream_info.size(); + auto const total_data_size = gather_stream_info(stripe_idx, + level, + stripe_info, + stripe_footer, + col_meta.orc_col_map[level], + _metadata.get_types(), + level == 0, + stream_info); + + auto const is_stripe_data_empty = total_data_size == 0; + CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, + "Invalid index rowgroup stream data"); + + // Buffer needs to be padded. + // Required by `copy_uncompressed_kernel`. + stripe_data.emplace_back(cudf::util::round_up_safe(total_data_size, BUFFER_PADDING_MULTIPLE), + _stream); + auto dst_base = static_cast(stripe_data.back().data()); + + // Coalesce consecutive streams into one read + while (not is_stripe_data_empty and stream_count < stream_info.size()) { + auto const d_dst = dst_base + stream_info[stream_count].dst_pos; + auto const offset = stream_info[stream_count].offset; + auto len = stream_info[stream_count].length; + stream_count++; + + while (stream_count < stream_info.size() && + stream_info[stream_count].offset == offset + len) { + len += stream_info[stream_count].length; stream_count++; - - while (stream_count < stream_info.size() && - stream_info[stream_count].offset == offset + len) { - len += stream_info[stream_count].length; - stream_count++; - } - if (_metadata.per_file_metadata[stripe_source_mapping.source_idx] - .source->is_device_read_preferred(len)) { - read_tasks.push_back( - std::pair(_metadata.per_file_metadata[stripe_source_mapping.source_idx] - .source->device_read_async(offset, len, d_dst, _stream), - len)); - - } else { - auto const buffer = - _metadata.per_file_metadata[stripe_source_mapping.source_idx].source->host_read( - offset, len); - CUDF_EXPECTS(buffer->size() == len, "Unexpected discrepancy in bytes read."); - CUDF_CUDA_TRY( - cudaMemcpyAsync(d_dst, buffer->data(), len, cudaMemcpyDefault, _stream.value())); - _stream.synchronize(); - } } - - stripe_idx++; + if (_metadata.per_file_metadata[stripe.source_idx].source->is_device_read_preferred(len)) { + read_tasks.push_back( + std::pair(_metadata.per_file_metadata[stripe.source_idx].source->device_read_async( + offset, len, d_dst, _stream), + len)); + + } else { + auto const buffer = + _metadata.per_file_metadata[stripe.source_idx].source->host_read(offset, len); + CUDF_EXPECTS(buffer->size() == len, "Unexpected discrepancy in bytes read."); + CUDF_CUDA_TRY( + cudaMemcpyAsync(d_dst, buffer->data(), len, cudaMemcpyDefault, _stream.value())); + _stream.synchronize(); + } } + + stripe_idx++; } + for (auto& task : read_tasks) { CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); } diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 83954a7dd1b..f6728f22688 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -110,7 +110,7 @@ struct file_intermediate_data { int64_t rows_to_skip; size_type rows_to_read; - std::vector selected_stripes; + std::vector selected_stripes; }; } // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 44b3a138c69..e8f39b2d870 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -751,7 +751,7 @@ void reader::impl::create_pass_data() // Logically view streams as columns std::vector stream_info; - stream_info.reserve(selected_stripes.size() * selected_stripes.front().stripe_info.size()); + // stream_info.reserve(selected_stripes.size() * selected_stripes.front().stripe_info.size()); auto& col_meta = *_col_meta; for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { @@ -803,10 +803,9 @@ void reader::impl::prepare_data(uint64_t skip_rows, }); }); - return has_timestamp_column - ? cudf::detail::make_timezone_transition_table( - {}, selected_stripes[0].stripe_info[0].second->writerTimezone, _stream) - : std::make_unique(); + return has_timestamp_column ? cudf::detail::make_timezone_transition_table( + {}, selected_stripes[0].stripe_footer->writerTimezone, _stream) + : std::make_unique(); }(); auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; @@ -850,14 +849,8 @@ void reader::impl::prepare_data(uint64_t skip_rows, } // Get the total number of stripes across all input files. - std::size_t total_num_stripes = - std::accumulate(selected_stripes.begin(), - selected_stripes.end(), - 0, - [](std::size_t sum, auto& stripe_source_mapping) { - return sum + stripe_source_mapping.stripe_info.size(); - }); - auto const num_columns = columns_level.size(); + std::size_t total_num_stripes = selected_stripes.size(); + auto const num_columns = columns_level.size(); cudf::detail::hostdevice_2dvector chunks( total_num_stripes, num_columns, _stream); memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); @@ -896,89 +889,85 @@ void reader::impl::prepare_data(uint64_t skip_rows, std::size_t stream_idx = 0; // std::vector, std::size_t>> read_tasks; - for (auto const& stripe_source_mapping : selected_stripes) { - // Iterate through the source files selected stripes - for (auto const& stripe : stripe_source_mapping.stripe_info) { - auto const stripe_info = stripe.first; - auto const stripe_footer = stripe.second; - - auto const total_data_size = - gather_stream_info_and_update_chunks(stripe_idx, - level, - stripe_info, - stripe_footer, - col_meta.orc_col_map[level], - _metadata.get_types(), - use_index, - level == 0, - &num_dict_entries, - &stream_idx, - chunks); - - auto const is_stripe_data_empty = total_data_size == 0; - CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, - "Invalid index rowgroup stream data"); - - auto dst_base = static_cast(stripe_data[stripe_idx].data()); - - auto const num_rows_per_stripe = stripe_info->numberOfRows; - auto const rowgroup_id = num_rowgroups; - auto stripe_num_rowgroups = 0; - if (use_index) { - stripe_num_rowgroups = (num_rows_per_stripe + _metadata.get_row_index_stride() - 1) / - _metadata.get_row_index_stride(); - } - // Update chunks to reference streams pointers - for (std::size_t col_idx = 0; col_idx < num_columns; col_idx++) { - auto& chunk = chunks[stripe_idx][col_idx]; - // start row, number of rows in a each stripe and total number of rows - // may change in lower levels of nesting - chunk.start_row = (level == 0) - ? stripe_start_row - : col_meta.child_start_row[stripe_idx * num_columns + col_idx]; - chunk.num_rows = - (level == 0) ? stripe_info->numberOfRows - : col_meta.num_child_rows_per_stripe[stripe_idx * num_columns + col_idx]; - chunk.column_num_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[col_idx]; - chunk.parent_validity_info = - (level == 0) ? column_validity_info{} : col_meta.parent_column_data[col_idx]; - chunk.parent_null_count_prefix_sums = - (level == 0) - ? nullptr - : null_count_prefix_sums[level - 1][col_meta.parent_column_index[col_idx]].data(); - chunk.encoding_kind = stripe_footer->columns[columns_level[col_idx].id].kind; - chunk.type_kind = _metadata.per_file_metadata[stripe_source_mapping.source_idx] - .ff.types[columns_level[col_idx].id] - .kind; - // num_child_rows for a struct column will be same, for other nested types it will be - // calculated. - chunk.num_child_rows = (chunk.type_kind != orc::STRUCT) ? 0 : chunk.num_rows; - chunk.dtype_id = column_types[col_idx].id(); - chunk.decimal_scale = _metadata.per_file_metadata[stripe_source_mapping.source_idx] - .ff.types[columns_level[col_idx].id] - .scale.value_or(0); - - chunk.rowgroup_id = rowgroup_id; - chunk.dtype_len = (column_types[col_idx].id() == type_id::STRING) - ? sizeof(string_index_pair) - : ((column_types[col_idx].id() == type_id::LIST) or - (column_types[col_idx].id() == type_id::STRUCT)) - ? sizeof(size_type) - : cudf::size_of(column_types[col_idx]); - chunk.num_rowgroups = stripe_num_rowgroups; - if (chunk.type_kind == orc::TIMESTAMP) { chunk.timestamp_type_id = _timestamp_type.id(); } - if (not is_stripe_data_empty) { - for (int k = 0; k < gpu::CI_NUM_STREAMS; k++) { - chunk.streams[k] = dst_base + stream_info[chunk.strm_id[k]].dst_pos; - } + for (auto const& stripe : selected_stripes) { + auto const stripe_info = stripe.stripe_info; + auto const stripe_footer = stripe.stripe_footer; + + auto const total_data_size = gather_stream_info_and_update_chunks(stripe_idx, + level, + stripe_info, + stripe_footer, + col_meta.orc_col_map[level], + _metadata.get_types(), + use_index, + level == 0, + &num_dict_entries, + &stream_idx, + chunks); + + auto const is_stripe_data_empty = total_data_size == 0; + CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, + "Invalid index rowgroup stream data"); + + auto dst_base = static_cast(stripe_data[stripe_idx].data()); + + auto const num_rows_per_stripe = stripe_info->numberOfRows; + auto const rowgroup_id = num_rowgroups; + auto stripe_num_rowgroups = 0; + if (use_index) { + stripe_num_rowgroups = (num_rows_per_stripe + _metadata.get_row_index_stride() - 1) / + _metadata.get_row_index_stride(); + } + // Update chunks to reference streams pointers + for (std::size_t col_idx = 0; col_idx < num_columns; col_idx++) { + auto& chunk = chunks[stripe_idx][col_idx]; + // start row, number of rows in a each stripe and total number of rows + // may change in lower levels of nesting + chunk.start_row = (level == 0) + ? stripe_start_row + : col_meta.child_start_row[stripe_idx * num_columns + col_idx]; + chunk.num_rows = (level == 0) + ? stripe_info->numberOfRows + : col_meta.num_child_rows_per_stripe[stripe_idx * num_columns + col_idx]; + chunk.column_num_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[col_idx]; + chunk.parent_validity_info = + (level == 0) ? column_validity_info{} : col_meta.parent_column_data[col_idx]; + chunk.parent_null_count_prefix_sums = + (level == 0) + ? nullptr + : null_count_prefix_sums[level - 1][col_meta.parent_column_index[col_idx]].data(); + chunk.encoding_kind = stripe_footer->columns[columns_level[col_idx].id].kind; + chunk.type_kind = + _metadata.per_file_metadata[stripe.source_idx].ff.types[columns_level[col_idx].id].kind; + // num_child_rows for a struct column will be same, for other nested types it will be + // calculated. + chunk.num_child_rows = (chunk.type_kind != orc::STRUCT) ? 0 : chunk.num_rows; + chunk.dtype_id = column_types[col_idx].id(); + chunk.decimal_scale = _metadata.per_file_metadata[stripe.source_idx] + .ff.types[columns_level[col_idx].id] + .scale.value_or(0); + + chunk.rowgroup_id = rowgroup_id; + chunk.dtype_len = (column_types[col_idx].id() == type_id::STRING) + ? sizeof(string_index_pair) + : ((column_types[col_idx].id() == type_id::LIST) or + (column_types[col_idx].id() == type_id::STRUCT)) + ? sizeof(size_type) + : cudf::size_of(column_types[col_idx]); + chunk.num_rowgroups = stripe_num_rowgroups; + if (chunk.type_kind == orc::TIMESTAMP) { chunk.timestamp_type_id = _timestamp_type.id(); } + if (not is_stripe_data_empty) { + for (int k = 0; k < gpu::CI_NUM_STREAMS; k++) { + chunk.streams[k] = dst_base + stream_info[chunk.strm_id[k]].dst_pos; } } - stripe_start_row += num_rows_per_stripe; - num_rowgroups += stripe_num_rowgroups; - - stripe_idx++; } + stripe_start_row += num_rows_per_stripe; + num_rowgroups += stripe_num_rowgroups; + + stripe_idx++; } + // for (auto& task : read_tasks) { // CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); // } From 90ac38065138dca73dfe9fceecf39a8bfd2a6387 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 27 Jan 2024 20:53:09 -0800 Subject: [PATCH 029/321] Store data chunk descriptors Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.hpp | 2 +- cpp/src/io/orc/reader_impl_preprocess.cu | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index f6728f22688..f4f23a9382a 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -105,7 +105,7 @@ struct file_intermediate_data { std::vector> lvl_stripe_data; std::vector>> null_count_prefix_sums; - + std::vector> lvl_data_chunks; std::vector> lvl_stream_info; int64_t rows_to_skip; diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index e8f39b2d870..62f5c6be049 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -810,7 +810,9 @@ void reader::impl::prepare_data(uint64_t skip_rows, auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; auto& null_count_prefix_sums = _file_itm_data->null_count_prefix_sums; + auto& lvl_chunks = _file_itm_data->lvl_data_chunks; lvl_stripe_data.resize(_selected_columns.num_levels()); + lvl_chunks.resize(_selected_columns.num_levels()); _out_buffers.resize(_selected_columns.num_levels()); @@ -851,8 +853,9 @@ void reader::impl::prepare_data(uint64_t skip_rows, // Get the total number of stripes across all input files. std::size_t total_num_stripes = selected_stripes.size(); auto const num_columns = columns_level.size(); - cudf::detail::hostdevice_2dvector chunks( - total_num_stripes, num_columns, _stream); + auto& chunks = lvl_chunks[level]; + chunks = + cudf::detail::hostdevice_2dvector(total_num_stripes, num_columns, _stream); memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); const bool use_index = From c8eeaccb29bc6f1abe65f8c1307a467f35d1b992 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 27 Jan 2024 21:04:10 -0800 Subject: [PATCH 030/321] Create read_info vector Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 7 ++++++- cpp/src/io/orc/reader_impl_chunking.hpp | 11 +++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index e58e804d449..249a24fdd3c 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -123,7 +123,9 @@ void reader::impl::query_stripe_compression_info() if (rows_to_read == 0 || selected_stripes.empty()) { return; } auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; + auto& lvl_read_info = _file_itm_data->lvl_read_info; lvl_stripe_data.resize(_selected_columns.num_levels()); + lvl_read_info.resize(_selected_columns.num_levels()); // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. std::unordered_map @@ -150,7 +152,9 @@ void reader::impl::query_stripe_compression_info() for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto& stream_info = _file_itm_data->lvl_stream_info[level]; auto const num_columns = _selected_columns.levels[level].size(); - stream_info.reserve(selected_stripes.size() * num_columns); + auto& read_info = lvl_read_info[level]; + stream_info.reserve(selected_stripes.size() * num_columns); // final size is unknown + read_info.reserve(selected_stripes.size() * num_columns); // final size is unknown // Get the total number of stripes across all input files. std::size_t total_num_stripes = selected_stripes.size(); @@ -197,6 +201,7 @@ void reader::impl::query_stripe_compression_info() len += stream_info[stream_count].length; stream_count++; } + read_info.emplace_back(offset, len, d_dst); if (_metadata.per_file_metadata[stripe.source_idx].source->is_device_read_preferred(len)) { read_tasks.push_back( std::pair(_metadata.per_file_metadata[stripe.source_idx].source->device_read_async( diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index f4f23a9382a..f3e0b421843 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -108,6 +108,17 @@ struct file_intermediate_data { std::vector> lvl_data_chunks; std::vector> lvl_stream_info; + struct read_info { + read_info(uint64_t offset_, std::size_t length_, uint8_t* dst_pos_) + : offset(offset_), length(length_), dst_pos(dst_pos_) + { + } + uint64_t offset; + std::size_t length; + uint8_t* dst_pos; + }; + std::vector> lvl_read_info; + int64_t rows_to_skip; size_type rows_to_read; std::vector selected_stripes; From 9a8949d48b5b9bcd9aa4ccc2780c4a03a109140c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 27 Jan 2024 21:30:38 -0800 Subject: [PATCH 031/321] Store stripe sizes Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 22 ++++++++++++---------- cpp/src/io/orc/reader_impl_chunking.hpp | 1 + 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 249a24fdd3c..d866c9ea6d4 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -122,9 +122,11 @@ void reader::impl::query_stripe_compression_info() // TODO : remove? if (rows_to_read == 0 || selected_stripes.empty()) { return; } - auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; - auto& lvl_read_info = _file_itm_data->lvl_read_info; + auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; + auto& lvl_stripe_sizes = _file_itm_data->lvl_stripe_sizes; + auto& lvl_read_info = _file_itm_data->lvl_read_info; lvl_stripe_data.resize(_selected_columns.num_levels()); + lvl_stripe_sizes.resize(_selected_columns.num_levels()); lvl_read_info.resize(_selected_columns.num_levels()); // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. @@ -153,19 +155,20 @@ void reader::impl::query_stripe_compression_info() auto& stream_info = _file_itm_data->lvl_stream_info[level]; auto const num_columns = _selected_columns.levels[level].size(); auto& read_info = lvl_read_info[level]; - stream_info.reserve(selected_stripes.size() * num_columns); // final size is unknown - read_info.reserve(selected_stripes.size() * num_columns); // final size is unknown + auto& stripe_sizes = lvl_stripe_sizes[level]; + stream_info.reserve(selected_stripes.size() * num_columns); // final size is unknown + read_info.reserve(selected_stripes.size() * num_columns); // final size is unknown + stripe_sizes.reserve(selected_stripes.size() * num_columns); // final size is unknown // Get the total number of stripes across all input files. - std::size_t total_num_stripes = selected_stripes.size(); + std::size_t num_stripes = selected_stripes.size(); // Tracker for eventually deallocating compressed and uncompressed data auto& stripe_data = lvl_stripe_data[level]; - int stripe_idx = 0; - std::vector, std::size_t>> read_tasks; - for (auto const& stripe : selected_stripes) { + for (std::size_t stripe_idx = 0; stripe_idx < num_stripes; stripe_idx++) { + auto const& stripe = selected_stripes[stripe_idx]; auto const stripe_info = stripe.stripe_info; auto const stripe_footer = stripe.stripe_footer; @@ -178,6 +181,7 @@ void reader::impl::query_stripe_compression_info() _metadata.get_types(), level == 0, stream_info); + stripe_sizes.push_back(total_data_size); auto const is_stripe_data_empty = total_data_size == 0; CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, @@ -217,8 +221,6 @@ void reader::impl::query_stripe_compression_info() _stream.synchronize(); } } - - stripe_idx++; } for (auto& task : read_tasks) { diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index f3e0b421843..1b5ee86d107 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -103,6 +103,7 @@ struct file_intermediate_data { compinfo_map; bool compinfo_ready{false}; + std::vector> lvl_stripe_sizes; std::vector> lvl_stripe_data; std::vector>> null_count_prefix_sums; std::vector> lvl_data_chunks; From 06ee057f0088d36feaaf997b356bdee658ba1c73 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 27 Jan 2024 22:52:16 -0800 Subject: [PATCH 032/321] Read separately from parse stripe sizes Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 130 +++++++++++++++--------- cpp/src/io/orc/reader_impl_chunking.hpp | 22 +++- 2 files changed, 99 insertions(+), 53 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index d866c9ea6d4..fabe00dadf9 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -124,10 +124,10 @@ void reader::impl::query_stripe_compression_info() auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; auto& lvl_stripe_sizes = _file_itm_data->lvl_stripe_sizes; - auto& lvl_read_info = _file_itm_data->lvl_read_info; lvl_stripe_data.resize(_selected_columns.num_levels()); lvl_stripe_sizes.resize(_selected_columns.num_levels()); - lvl_read_info.resize(_selected_columns.num_levels()); + + auto& read_info = _file_itm_data->read_info; // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. std::unordered_map @@ -151,51 +151,46 @@ void reader::impl::query_stripe_compression_info() } } + // Get the total number of stripes across all input files. + std::size_t num_stripes = selected_stripes.size(); + + // Compute input size for each stripe. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + lvl_stripe_data[level].resize(num_stripes); + auto& stream_info = _file_itm_data->lvl_stream_info[level]; auto const num_columns = _selected_columns.levels[level].size(); - auto& read_info = lvl_read_info[level]; auto& stripe_sizes = lvl_stripe_sizes[level]; - stream_info.reserve(selected_stripes.size() * num_columns); // final size is unknown - read_info.reserve(selected_stripes.size() * num_columns); // final size is unknown - stripe_sizes.reserve(selected_stripes.size() * num_columns); // final size is unknown - - // Get the total number of stripes across all input files. - std::size_t num_stripes = selected_stripes.size(); + stream_info.reserve(selected_stripes.size() * num_columns); // final size is unknown - // Tracker for eventually deallocating compressed and uncompressed data - auto& stripe_data = lvl_stripe_data[level]; + stripe_sizes.resize(selected_stripes.size()); + if (read_info.capacity() < selected_stripes.size()) { + read_info.reserve(selected_stripes.size() * num_columns); // final size is unknown + } - std::vector, std::size_t>> read_tasks; - for (std::size_t stripe_idx = 0; stripe_idx < num_stripes; stripe_idx++) { + for (std::size_t stripe_idx = 0; stripe_idx < num_stripes; ++stripe_idx) { auto const& stripe = selected_stripes[stripe_idx]; auto const stripe_info = stripe.stripe_info; auto const stripe_footer = stripe.stripe_footer; - auto stream_count = stream_info.size(); - auto const total_data_size = gather_stream_info(stripe_idx, - level, - stripe_info, - stripe_footer, - col_meta.orc_col_map[level], - _metadata.get_types(), - level == 0, - stream_info); - stripe_sizes.push_back(total_data_size); - - auto const is_stripe_data_empty = total_data_size == 0; + auto stream_count = stream_info.size(); + auto const stripe_size = gather_stream_info(stripe_idx, + level, + stripe_info, + stripe_footer, + col_meta.orc_col_map[level], + _metadata.get_types(), + level == 0, + stream_info); + stripe_sizes[stripe_idx] = stripe_size; + + auto const is_stripe_data_empty = stripe_size == 0; CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, "Invalid index rowgroup stream data"); - // Buffer needs to be padded. - // Required by `copy_uncompressed_kernel`. - stripe_data.emplace_back(cudf::util::round_up_safe(total_data_size, BUFFER_PADDING_MULTIPLE), - _stream); - auto dst_base = static_cast(stripe_data.back().data()); - // Coalesce consecutive streams into one read while (not is_stripe_data_empty and stream_count < stream_info.size()) { - auto const d_dst = dst_base + stream_info[stream_count].dst_pos; + auto const d_dst = stream_info[stream_count].dst_pos; auto const offset = stream_info[stream_count].offset; auto len = stream_info[stream_count].length; stream_count++; @@ -205,28 +200,65 @@ void reader::impl::query_stripe_compression_info() len += stream_info[stream_count].length; stream_count++; } - read_info.emplace_back(offset, len, d_dst); - if (_metadata.per_file_metadata[stripe.source_idx].source->is_device_read_preferred(len)) { - read_tasks.push_back( - std::pair(_metadata.per_file_metadata[stripe.source_idx].source->device_read_async( - offset, len, d_dst, _stream), - len)); - - } else { - auto const buffer = - _metadata.per_file_metadata[stripe.source_idx].source->host_read(offset, len); - CUDF_EXPECTS(buffer->size() == len, "Unexpected discrepancy in bytes read."); - CUDF_CUDA_TRY( - cudaMemcpyAsync(d_dst, buffer->data(), len, cudaMemcpyDefault, _stream.value())); - _stream.synchronize(); - } + read_info.emplace_back(offset, len, d_dst, stripe.source_idx, stripe_idx, level); } } + } - for (auto& task : read_tasks) { - CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); + // Prepare the buffer to read raw data onto. + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + auto& stripe_data = lvl_stripe_data[level]; + auto& stripe_sizes = lvl_stripe_sizes[level]; + for (std::size_t stripe_idx = 0; stripe_idx < num_stripes; ++stripe_idx) { + stripe_data[stripe_idx] = rmm::device_buffer( + cudf::util::round_up_safe(stripe_sizes[stripe_idx], BUFFER_PADDING_MULTIPLE), _stream); } + } + + std::vector, std::size_t>> read_tasks; + // Should not read all, but read stripe by stripe. + for (auto const& read : read_info) { + auto& stripe_data = lvl_stripe_data[read.level]; + auto dst_base = static_cast(stripe_data[read.stripe_idx].data()); + if (_metadata.per_file_metadata[read.source_idx].source->is_device_read_preferred( + read.length)) { + read_tasks.push_back( + std::pair(_metadata.per_file_metadata[read.source_idx].source->device_read_async( + read.offset, read.length, dst_base + read.dst_pos, _stream), + read.length)); + + } else { + read_tasks.push_back( + std::pair(std::async(std::launch::async, + [&, read = read, dst_base = dst_base] { + auto const buffer = + _metadata.per_file_metadata[read.source_idx].source->host_read( + read.offset, read.length); + CUDF_EXPECTS(buffer->size() == read.length, + "Unexpected discrepancy in bytes read."); + CUDF_CUDA_TRY(cudaMemcpyAsync(dst_base + read.dst_pos, + buffer->data(), + read.length, + cudaMemcpyDefault, + _stream.value())); + _stream.synchronize(); + return read.length; + }), + read.length)); + } + } + for (auto& task : read_tasks) { + CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); + } + + // Parse the decompressed sizes for each stripe. + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + auto& stream_info = _file_itm_data->lvl_stream_info[level]; + auto const num_columns = _selected_columns.levels[level].size(); + + // Tracker for eventually deallocating compressed and uncompressed data + auto& stripe_data = lvl_stripe_data[level]; if (stripe_data.empty()) { continue; } // Setup row group descriptors if using indexes diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 1b5ee86d107..9797d113e9a 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -109,16 +109,30 @@ struct file_intermediate_data { std::vector> lvl_data_chunks; std::vector> lvl_stream_info; + // Each read correspond to one or more consecutive stream combined. struct read_info { - read_info(uint64_t offset_, std::size_t length_, uint8_t* dst_pos_) - : offset(offset_), length(length_), dst_pos(dst_pos_) + read_info(uint64_t offset_, + std::size_t length_, + std::size_t dst_pos_, + std::size_t source_idx_, + std::size_t stripe_idx_, + std::size_t level_) + : offset(offset_), + length(length_), + dst_pos(dst_pos_), + source_idx(source_idx_), + stripe_idx(stripe_idx_), + level(level_) { } uint64_t offset; std::size_t length; - uint8_t* dst_pos; + std::size_t dst_pos; + std::size_t source_idx; + std::size_t stripe_idx; + std::size_t level; }; - std::vector> lvl_read_info; + std::vector read_info; int64_t rows_to_skip; size_type rows_to_read; From 746a2ef05fae3e0db57d641451195abed1922417 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 27 Jan 2024 23:10:31 -0800 Subject: [PATCH 033/321] Fix copyright year Signed-off-by: Nghia Truong --- cpp/src/io/orc/orc.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/orc/orc.hpp b/cpp/src/io/orc/orc.hpp index d17291d4acb..a3fdef78a37 100644 --- a/cpp/src/io/orc/orc.hpp +++ b/cpp/src/io/orc/orc.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 3f2a36efc2087fae99a686594b4eef1f2ef6bc21 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 27 Jan 2024 23:11:05 -0800 Subject: [PATCH 034/321] Compute input size stripe by stripe Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index fabe00dadf9..40b4ee7fc63 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -136,6 +136,9 @@ void reader::impl::query_stripe_compression_info() // Logically view streams as columns _file_itm_data->lvl_stream_info.resize(_selected_columns.num_levels()); + // Get the total number of stripes across all input files. + std::size_t num_stripes = selected_stripes.size(); + // Iterates through levels of nested columns, child column will be one level down // compared to parent column. auto& col_meta = *_col_meta; @@ -149,13 +152,7 @@ void reader::impl::query_stripe_compression_info() // Map each ORC column to its column col_meta.orc_col_map[level][col.id] = col_id++; } - } - - // Get the total number of stripes across all input files. - std::size_t num_stripes = selected_stripes.size(); - // Compute input size for each stripe. - for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { lvl_stripe_data[level].resize(num_stripes); auto& stream_info = _file_itm_data->lvl_stream_info[level]; @@ -167,11 +164,17 @@ void reader::impl::query_stripe_compression_info() if (read_info.capacity() < selected_stripes.size()) { read_info.reserve(selected_stripes.size() * num_columns); // final size is unknown } + } - for (std::size_t stripe_idx = 0; stripe_idx < num_stripes; ++stripe_idx) { - auto const& stripe = selected_stripes[stripe_idx]; - auto const stripe_info = stripe.stripe_info; - auto const stripe_footer = stripe.stripe_footer; + // Compute input size for each stripe. + for (std::size_t stripe_idx = 0; stripe_idx < num_stripes; ++stripe_idx) { + auto const& stripe = selected_stripes[stripe_idx]; + auto const stripe_info = stripe.stripe_info; + auto const stripe_footer = stripe.stripe_footer; + + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + auto& stream_info = _file_itm_data->lvl_stream_info[level]; + auto& stripe_sizes = lvl_stripe_sizes[level]; auto stream_count = stream_info.size(); auto const stripe_size = gather_stream_info(stripe_idx, @@ -217,6 +220,7 @@ void reader::impl::query_stripe_compression_info() std::vector, std::size_t>> read_tasks; // Should not read all, but read stripe by stripe. + // read_info should be limited by stripe. for (auto const& read : read_info) { auto& stripe_data = lvl_stripe_data[read.level]; auto dst_base = static_cast(stripe_data[read.stripe_idx].data()); From 5c2c5ea22b5859a18fbc076b1df9dc5c9885a3d0 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 27 Jan 2024 23:19:27 -0800 Subject: [PATCH 035/321] Read without async Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 40b4ee7fc63..0a41c735d33 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -233,6 +233,15 @@ void reader::impl::query_stripe_compression_info() read.length)); } else { + auto const buffer = + _metadata.per_file_metadata[read.source_idx].source->host_read(read.offset, read.length); + CUDF_EXPECTS(buffer->size() == read.length, "Unexpected discrepancy in bytes read."); + CUDF_CUDA_TRY(cudaMemcpyAsync( + dst_base + read.dst_pos, buffer->data(), read.length, cudaMemcpyDefault, _stream.value())); + _stream.synchronize(); + +#if 0 + // This in theory should be faster, but in practice it's slower. Why? read_tasks.push_back( std::pair(std::async(std::launch::async, [&, read = read, dst_base = dst_base] { @@ -250,6 +259,7 @@ void reader::impl::query_stripe_compression_info() return read.length; }), read.length)); +#endif } } for (auto& task : read_tasks) { From 0a1db0924a6c705d18dff57d7c780b8d2869cb8c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 27 Jan 2024 23:45:34 -0800 Subject: [PATCH 036/321] Find stripe split Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 83 ++++++++++++++++++++++++- cpp/src/io/orc/reader_impl_chunking.hpp | 7 +++ 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 0a41c735d33..a0e79959051 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -107,6 +107,63 @@ std::size_t gather_stream_info(std::size_t stripe_index, return dst_offset; } +struct cumulative_size { + std::size_t count; + std::size_t size_bytes; +}; + +struct cumulative_size_sum { + __device__ cumulative_size operator()(cumulative_size const& a, cumulative_size const& b) const + { + return cumulative_size{a.count + b.count, a.size_bytes + b.size_bytes}; + } +}; + +#if 0 +std::vector find_splits(host_span sizes, + size_type num_rows, + size_t size_limit) +{ + std::vector splits; + + uint32_t cur_count = 0; + int64_t cur_pos = 0; + size_t cur_cumulative_size = 0; + auto const start = thrust::make_transform_iterator( + sizes.begin(), [&](auto const& size) { return size.size_bytes - cur_cumulative_size; }); + auto const end = start + static_cast(sizes.size()); + while (cur_count < static_cast(num_rows)) { + int64_t split_pos = + thrust::distance(start, thrust::lower_bound(thrust::seq, start + cur_pos, end, size_limit)); + + // If we're past the end, or if the returned bucket is bigger than the chunk_read_limit, move + // back one. + if (static_cast(split_pos) >= sizes.size() || + (sizes[split_pos].size_bytes - cur_cumulative_size > size_limit)) { + split_pos--; + } + + // best-try. if we can't find something that'll fit, we have to go bigger. we're doing this in + // a loop because all of the cumulative sizes for all the pages are sorted into one big list. + // so if we had two columns, both of which had an entry {1000, 10000}, that entry would be in + // the list twice. so we have to iterate until we skip past all of them. The idea is that we + // either do this, or we have to call unique() on the input first. + while (split_pos < (static_cast(sizes.size()) - 1) && + (split_pos < 0 || sizes[split_pos].count == cur_count)) { + split_pos++; + } + + auto const start_row = cur_count; + cur_count = sizes[split_pos].count; + splits.emplace_back(chunk{start_row, static_cast(cur_count - start_row)}); + cur_pos = split_pos; + cur_cumulative_size = sizes[split_pos].size_bytes; + } + + return splits; +} +#endif + } // namespace void reader::impl::query_stripe_compression_info() @@ -114,7 +171,6 @@ void reader::impl::query_stripe_compression_info() if (_file_itm_data->compinfo_ready) { return; } if (_selected_columns.num_levels() == 0) { return; } - auto const rows_to_skip = _file_itm_data->rows_to_skip; auto const rows_to_read = _file_itm_data->rows_to_read; auto const& selected_stripes = _file_itm_data->selected_stripes; @@ -166,12 +222,15 @@ void reader::impl::query_stripe_compression_info() } } + cudf::detail::hostdevice_vector total_stripe_sizes(num_stripes, _stream); + // Compute input size for each stripe. for (std::size_t stripe_idx = 0; stripe_idx < num_stripes; ++stripe_idx) { auto const& stripe = selected_stripes[stripe_idx]; auto const stripe_info = stripe.stripe_info; auto const stripe_footer = stripe.stripe_footer; + std::size_t total_stripe_size{0}; for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto& stream_info = _file_itm_data->lvl_stream_info[level]; auto& stripe_sizes = lvl_stripe_sizes[level]; @@ -186,6 +245,7 @@ void reader::impl::query_stripe_compression_info() level == 0, stream_info); stripe_sizes[stripe_idx] = stripe_size; + total_stripe_size += stripe_size; auto const is_stripe_data_empty = stripe_size == 0; CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, @@ -206,8 +266,26 @@ void reader::impl::query_stripe_compression_info() read_info.emplace_back(offset, len, d_dst, stripe.source_idx, stripe_idx, level); } } + total_stripe_sizes[stripe_idx] = {1, total_stripe_size}; } + // Compute the prefix sum of stripe data sizes. + total_stripe_sizes.host_to_device_async(_stream); + thrust::inclusive_scan(rmm::exec_policy(_stream), + total_stripe_sizes.d_begin(), + total_stripe_sizes.d_end(), + total_stripe_sizes.d_begin(), + cumulative_size_sum{}); + + total_stripe_sizes.device_to_host_sync(_stream); + + // fix this: + // _file_itm_data->stripe_chunks = + // find_splits(total_stripe_sizes, _file_itm_data->rows_to_read, /*chunk_size_limit*/ 0); + + // std::cout << " total rows: " << _file_itm_data.rows_to_read << std::endl; + // print_cumulative_row_info(stripe_size_bytes, " ", _chunk_read_info.chunks); + // Prepare the buffer to read raw data onto. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto& stripe_data = lvl_stripe_data[level]; @@ -221,6 +299,9 @@ void reader::impl::query_stripe_compression_info() std::vector, std::size_t>> read_tasks; // Should not read all, but read stripe by stripe. // read_info should be limited by stripe. + // Read level-by-level. + // TODO: Test with read and parse/decode column by column. + // This is future work. for (auto const& read : read_info) { auto& stripe_data = lvl_stripe_data[read.level]; auto dst_base = static_cast(stripe_data[read.stripe_idx].data()); diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 9797d113e9a..bacdbc7933f 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -95,6 +95,11 @@ struct stream_id_hash { } }; +struct chunk { + int64_t start_idx; + int64_t count; +}; + /** * @brief Struct to store file-level data that remains constant for all chunks being read. */ @@ -109,6 +114,8 @@ struct file_intermediate_data { std::vector> lvl_data_chunks; std::vector> lvl_stream_info; + std::vector stripe_chunks; + // Each read correspond to one or more consecutive stream combined. struct read_info { read_info(uint64_t offset_, From 7c9867440430306273d37627fdcd09d3066359ec Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 28 Jan 2024 08:15:11 -0800 Subject: [PATCH 037/321] Fix bug Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index a0e79959051..01d0e12395b 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -119,9 +119,9 @@ struct cumulative_size_sum { } }; -#if 0 +#if 1 std::vector find_splits(host_span sizes, - size_type num_rows, + size_type total_count, size_t size_limit) { std::vector splits; @@ -132,7 +132,7 @@ std::vector find_splits(host_span sizes, auto const start = thrust::make_transform_iterator( sizes.begin(), [&](auto const& size) { return size.size_bytes - cur_cumulative_size; }); auto const end = start + static_cast(sizes.size()); - while (cur_count < static_cast(num_rows)) { + while (cur_count < static_cast(total_count)) { int64_t split_pos = thrust::distance(start, thrust::lower_bound(thrust::seq, start + cur_pos, end, size_limit)); @@ -153,9 +153,9 @@ std::vector find_splits(host_span sizes, split_pos++; } - auto const start_row = cur_count; + auto const start_idx = cur_count; cur_count = sizes[split_pos].count; - splits.emplace_back(chunk{start_row, static_cast(cur_count - start_row)}); + splits.emplace_back(chunk{start_idx, static_cast(cur_count - start_idx)}); cur_pos = split_pos; cur_cumulative_size = sizes[split_pos].size_bytes; } @@ -279,9 +279,17 @@ void reader::impl::query_stripe_compression_info() total_stripe_sizes.device_to_host_sync(_stream); - // fix this: - // _file_itm_data->stripe_chunks = - // find_splits(total_stripe_sizes, _file_itm_data->rows_to_read, /*chunk_size_limit*/ 0); + _file_itm_data->stripe_chunks = find_splits( + total_stripe_sizes, + total_stripe_sizes.size(), + /*chunk_size_limit/2*/ total_stripe_sizes[total_stripe_sizes.size() - 1].size_bytes / 3); + + auto& splits = _file_itm_data->stripe_chunks; + printf("------------\nSplits (/%d): \n", (int)num_stripes); + for (size_t idx = 0; idx < splits.size(); idx++) { + printf("{%ld, %ld}\n", splits[idx].start_idx, splits[idx].count); + } + fflush(stdout); // std::cout << " total rows: " << _file_itm_data.rows_to_read << std::endl; // print_cumulative_row_info(stripe_size_bytes, " ", _chunk_read_info.chunks); From 3b9aabb0564a5564485056326036399ce2204343 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 28 Jan 2024 08:20:24 -0800 Subject: [PATCH 038/321] Rename variables Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 6 +++--- cpp/src/io/orc/reader_impl_chunking.hpp | 12 ++++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 01d0e12395b..4cd807fff2e 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -183,7 +183,7 @@ void reader::impl::query_stripe_compression_info() lvl_stripe_data.resize(_selected_columns.num_levels()); lvl_stripe_sizes.resize(_selected_columns.num_levels()); - auto& read_info = _file_itm_data->read_info; + auto& read_info = _file_itm_data->stream_read_info; // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. std::unordered_map @@ -279,12 +279,12 @@ void reader::impl::query_stripe_compression_info() total_stripe_sizes.device_to_host_sync(_stream); - _file_itm_data->stripe_chunks = find_splits( + _file_itm_data->load_stripe_chunks = find_splits( total_stripe_sizes, total_stripe_sizes.size(), /*chunk_size_limit/2*/ total_stripe_sizes[total_stripe_sizes.size() - 1].size_bytes / 3); - auto& splits = _file_itm_data->stripe_chunks; + auto& splits = _file_itm_data->load_stripe_chunks; printf("------------\nSplits (/%d): \n", (int)num_stripes); for (size_t idx = 0; idx < splits.size(); idx++) { printf("{%ld, %ld}\n", splits[idx].start_idx, splits[idx].count); diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index bacdbc7933f..fae4e2c69ca 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -114,11 +114,15 @@ struct file_intermediate_data { std::vector> lvl_data_chunks; std::vector> lvl_stream_info; - std::vector stripe_chunks; + std::vector load_stripe_chunks; + std::size_t curr_load_stripe_chunk{0}; + + std::vector decode_stripe_chunks; + std::size_t curr_decode_stripe_chunk{0}; // Each read correspond to one or more consecutive stream combined. - struct read_info { - read_info(uint64_t offset_, + struct stream_read_info { + stream_read_info(uint64_t offset_, std::size_t length_, std::size_t dst_pos_, std::size_t source_idx_, @@ -139,7 +143,7 @@ struct file_intermediate_data { std::size_t stripe_idx; std::size_t level; }; - std::vector read_info; + std::vector stream_read_info; int64_t rows_to_skip; size_type rows_to_read; From ea1c94f3e46f955f7d09434dfcb6915033972821 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 28 Jan 2024 08:30:15 -0800 Subject: [PATCH 039/321] Add function interface Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.hpp | 10 ++++---- cpp/src/io/orc/reader_impl_preprocess.cu | 29 ++++++------------------ 2 files changed, 13 insertions(+), 26 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 8130ac51f6d..8cf13a13b4d 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -77,10 +77,12 @@ class reader::impl { std::optional const& num_rows_opt, std::vector> const& stripes); - /** - * - */ - void create_pass_data(); + // Do once for the entire file. + void global_preprocess(); + + void pass_preprocess(); + + void subpass_preprocess(); /** * @brief Compute stripe sizes. diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 62f5c6be049..eb4652d5498 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -742,30 +742,11 @@ void generate_offsets_for_list(host_span buff_data, rmm::cuda_ } // namespace -void reader::impl::create_pass_data() -{ - auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; - lvl_stripe_data.resize(_selected_columns.num_levels()); - - auto const& selected_stripes = _file_itm_data->selected_stripes; +void reader::impl::global_preprocess() {} - // Logically view streams as columns - std::vector stream_info; - // stream_info.reserve(selected_stripes.size() * selected_stripes.front().stripe_info.size()); +void reader::impl::pass_preprocess() {} - auto& col_meta = *_col_meta; - for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { - auto& columns_level = _selected_columns.levels[level]; - // Association between each ORC column and its cudf::column - col_meta.orc_col_map.emplace_back(_metadata.get_num_cols(), -1); - - size_type col_id{0}; - for (auto& col : columns_level) { - // Map each ORC column to its column - col_meta.orc_col_map[level][col.id] = col_id++; - } - } -} +void reader::impl::subpass_preprocess() {} void reader::impl::prepare_data(uint64_t skip_rows, std::optional const& num_rows_opt, @@ -779,6 +760,10 @@ void reader::impl::prepare_data(uint64_t skip_rows, // There are no columns in the table if (_selected_columns.num_levels() == 0) { return; } + global_preprocess(); + pass_preprocess(); + subpass_preprocess(); + _file_itm_data = std::make_unique(); // Select only stripes required (aka row groups) From a4776fa25907f24692f60310dd0ef61665fd64d6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 28 Jan 2024 08:47:12 -0800 Subject: [PATCH 040/321] Add more interface Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.hpp | 4 +- cpp/src/io/orc/reader_impl_chunking.cu | 4 +- cpp/src/io/orc/reader_impl_chunking.hpp | 16 +++++--- cpp/src/io/orc/reader_impl_preprocess.cu | 48 ++++++++++++++++++------ 4 files changed, 51 insertions(+), 21 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 8cf13a13b4d..c439c44870a 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -78,7 +78,9 @@ class reader::impl { std::vector> const& stripes); // Do once for the entire file. - void global_preprocess(); + void global_preprocess(uint64_t skip_rows, + std::optional const& num_rows_opt, + std::vector> const& stripes); void pass_preprocess(); diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 4cd807fff2e..99511f2b320 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -168,7 +168,7 @@ std::vector find_splits(host_span sizes, void reader::impl::query_stripe_compression_info() { - if (_file_itm_data->compinfo_ready) { return; } + // if (_file_itm_data->compinfo_ready) { return; } if (_selected_columns.num_levels() == 0) { return; } auto const rows_to_read = _file_itm_data->rows_to_read; @@ -432,7 +432,7 @@ void reader::impl::query_stripe_compression_info() } // end loop level // lvl_stripe_data.clear(); - _file_itm_data->compinfo_ready = true; + // _file_itm_data->compinfo_ready = true; } } // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index fae4e2c69ca..83a61703f20 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -106,7 +106,7 @@ struct chunk { struct file_intermediate_data { std::unordered_map compinfo_map; - bool compinfo_ready{false}; + // bool compinfo_ready{false}; std::vector> lvl_stripe_sizes; std::vector> lvl_stripe_data; @@ -123,11 +123,11 @@ struct file_intermediate_data { // Each read correspond to one or more consecutive stream combined. struct stream_read_info { stream_read_info(uint64_t offset_, - std::size_t length_, - std::size_t dst_pos_, - std::size_t source_idx_, - std::size_t stripe_idx_, - std::size_t level_) + std::size_t length_, + std::size_t dst_pos_, + std::size_t source_idx_, + std::size_t stripe_idx_, + std::size_t level_) : offset(offset_), length(length_), dst_pos(dst_pos_), @@ -148,6 +148,10 @@ struct file_intermediate_data { int64_t rows_to_skip; size_type rows_to_read; std::vector selected_stripes; + + bool global_preprocessed{false}; + bool pass_preprocessed{false}; + bool subpass_preprocessed{false}; }; } // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index eb4652d5498..1f092243b74 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -742,11 +742,43 @@ void generate_offsets_for_list(host_span buff_data, rmm::cuda_ } // namespace -void reader::impl::global_preprocess() {} +void reader::impl::global_preprocess(uint64_t skip_rows, + std::optional const& num_rows_opt, + std::vector> const& stripes) +{ + if (_file_itm_data == nullptr) { _file_itm_data = std::make_unique(); } + if (_file_itm_data->global_preprocessed) { return; } -void reader::impl::pass_preprocess() {} + // TODO: move this to end of func. + _file_itm_data->global_preprocessed = true; -void reader::impl::subpass_preprocess() {} + // Select only stripes required (aka row groups) + std::tie( + _file_itm_data->rows_to_skip, _file_itm_data->rows_to_read, _file_itm_data->selected_stripes) = + _metadata.select_stripes(stripes, skip_rows, num_rows_opt, _stream); + auto const rows_to_skip = _file_itm_data->rows_to_skip; + auto const rows_to_read = _file_itm_data->rows_to_read; + auto const& selected_stripes = _file_itm_data->selected_stripes; + + // If no rows or stripes to read, return empty columns + if (rows_to_read == 0 || selected_stripes.empty()) { return; } + + query_stripe_compression_info(); +} + +void reader::impl::pass_preprocess() +{ + if (_file_itm_data->pass_preprocessed) { return; } + + _file_itm_data->pass_preprocessed = true; +} + +void reader::impl::subpass_preprocess() +{ + if (_file_itm_data->subpass_preprocessed) { return; } + + _file_itm_data->subpass_preprocessed = true; +} void reader::impl::prepare_data(uint64_t skip_rows, std::optional const& num_rows_opt, @@ -760,16 +792,10 @@ void reader::impl::prepare_data(uint64_t skip_rows, // There are no columns in the table if (_selected_columns.num_levels() == 0) { return; } - global_preprocess(); + global_preprocess(skip_rows, num_rows_opt, stripes); pass_preprocess(); subpass_preprocess(); - _file_itm_data = std::make_unique(); - - // Select only stripes required (aka row groups) - std::tie( - _file_itm_data->rows_to_skip, _file_itm_data->rows_to_read, _file_itm_data->selected_stripes) = - _metadata.select_stripes(stripes, skip_rows, num_rows_opt, _stream); auto const rows_to_skip = _file_itm_data->rows_to_skip; auto const rows_to_read = _file_itm_data->rows_to_read; auto const& selected_stripes = _file_itm_data->selected_stripes; @@ -777,8 +803,6 @@ void reader::impl::prepare_data(uint64_t skip_rows, // If no rows or stripes to read, return empty columns if (rows_to_read == 0 || selected_stripes.empty()) { return; } - query_stripe_compression_info(); - // Set up table for converting timestamp columns from local to UTC time auto const tz_table = [&, &selected_stripes = selected_stripes] { auto const has_timestamp_column = std::any_of( From 7da761e7f3729f29ae15e18a0fde57af3447f588 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 28 Jan 2024 09:20:54 -0800 Subject: [PATCH 041/321] Separate preprocessing into global, pass and subpass steps Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.hpp | 5 --- cpp/src/io/orc/reader_impl_chunking.cu | 56 ++++++++++++++++++++++-- cpp/src/io/orc/reader_impl_preprocess.cu | 38 ---------------- 3 files changed, 52 insertions(+), 47 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index c439c44870a..1b5bef347d0 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -86,11 +86,6 @@ class reader::impl { void subpass_preprocess(); - /** - * @brief Compute stripe sizes. - */ - void query_stripe_compression_info(); - /** * @brief Create the output table metadata from file metadata. * diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 99511f2b320..ea763de3d4f 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -166,16 +166,25 @@ std::vector find_splits(host_span sizes, } // namespace -void reader::impl::query_stripe_compression_info() +void reader::impl::global_preprocess(uint64_t skip_rows, + std::optional const& num_rows_opt, + std::vector> const& stripes) { - // if (_file_itm_data->compinfo_ready) { return; } - if (_selected_columns.num_levels() == 0) { return; } + if (_file_itm_data == nullptr) { _file_itm_data = std::make_unique(); } + if (_file_itm_data->global_preprocessed) { return; } + // TODO: move this to end of func. + _file_itm_data->global_preprocessed = true; + + // Select only stripes required (aka row groups) + std::tie( + _file_itm_data->rows_to_skip, _file_itm_data->rows_to_read, _file_itm_data->selected_stripes) = + _metadata.select_stripes(stripes, skip_rows, num_rows_opt, _stream); + auto const rows_to_skip = _file_itm_data->rows_to_skip; auto const rows_to_read = _file_itm_data->rows_to_read; auto const& selected_stripes = _file_itm_data->selected_stripes; // If no rows or stripes to read, return empty columns - // TODO : remove? if (rows_to_read == 0 || selected_stripes.empty()) { return; } auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; @@ -303,6 +312,32 @@ void reader::impl::query_stripe_compression_info() cudf::util::round_up_safe(stripe_sizes[stripe_idx], BUFFER_PADDING_MULTIPLE), _stream); } } +} + +void reader::impl::pass_preprocess() +{ + if (_file_itm_data->pass_preprocessed) { return; } + _file_itm_data->pass_preprocessed = true; + + auto const rows_to_read = _file_itm_data->rows_to_read; + auto const& selected_stripes = _file_itm_data->selected_stripes; + auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; + auto& lvl_stripe_sizes = _file_itm_data->lvl_stripe_sizes; + auto& read_info = _file_itm_data->stream_read_info; + + std::size_t num_stripes = selected_stripes.size(); + + // TODO: this is a pass + + // Prepare the buffer to read raw data onto. + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + auto& stripe_data = lvl_stripe_data[level]; + auto& stripe_sizes = lvl_stripe_sizes[level]; + for (std::size_t stripe_idx = 0; stripe_idx < num_stripes; ++stripe_idx) { + stripe_data[stripe_idx] = rmm::device_buffer( + cudf::util::round_up_safe(stripe_sizes[stripe_idx], BUFFER_PADDING_MULTIPLE), _stream); + } + } std::vector, std::size_t>> read_tasks; // Should not read all, but read stripe by stripe. @@ -354,6 +389,19 @@ void reader::impl::query_stripe_compression_info() for (auto& task : read_tasks) { CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); } +} + +void reader::impl::subpass_preprocess() +{ + if (_file_itm_data->subpass_preprocessed) { return; } + _file_itm_data->subpass_preprocessed = true; + + auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; + + // TODO: This is subpass + // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. + std::unordered_map + stream_compinfo_map; // Parse the decompressed sizes for each stripe. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 1f092243b74..0a99e831f8f 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -742,44 +742,6 @@ void generate_offsets_for_list(host_span buff_data, rmm::cuda_ } // namespace -void reader::impl::global_preprocess(uint64_t skip_rows, - std::optional const& num_rows_opt, - std::vector> const& stripes) -{ - if (_file_itm_data == nullptr) { _file_itm_data = std::make_unique(); } - if (_file_itm_data->global_preprocessed) { return; } - - // TODO: move this to end of func. - _file_itm_data->global_preprocessed = true; - - // Select only stripes required (aka row groups) - std::tie( - _file_itm_data->rows_to_skip, _file_itm_data->rows_to_read, _file_itm_data->selected_stripes) = - _metadata.select_stripes(stripes, skip_rows, num_rows_opt, _stream); - auto const rows_to_skip = _file_itm_data->rows_to_skip; - auto const rows_to_read = _file_itm_data->rows_to_read; - auto const& selected_stripes = _file_itm_data->selected_stripes; - - // If no rows or stripes to read, return empty columns - if (rows_to_read == 0 || selected_stripes.empty()) { return; } - - query_stripe_compression_info(); -} - -void reader::impl::pass_preprocess() -{ - if (_file_itm_data->pass_preprocessed) { return; } - - _file_itm_data->pass_preprocessed = true; -} - -void reader::impl::subpass_preprocess() -{ - if (_file_itm_data->subpass_preprocessed) { return; } - - _file_itm_data->subpass_preprocessed = true; -} - void reader::impl::prepare_data(uint64_t skip_rows, std::optional const& num_rows_opt, std::vector> const& stripes) From 7abe0cadf9caef9661e038ee7eb8a30bf64ca2f3 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 28 Jan 2024 11:09:07 -0800 Subject: [PATCH 042/321] Fix bug Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 30 +++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index ea763de3d4f..cc0fd46efaa 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -302,28 +302,22 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // std::cout << " total rows: " << _file_itm_data.rows_to_read << std::endl; // print_cumulative_row_info(stripe_size_bytes, " ", _chunk_read_info.chunks); - - // Prepare the buffer to read raw data onto. - for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { - auto& stripe_data = lvl_stripe_data[level]; - auto& stripe_sizes = lvl_stripe_sizes[level]; - for (std::size_t stripe_idx = 0; stripe_idx < num_stripes; ++stripe_idx) { - stripe_data[stripe_idx] = rmm::device_buffer( - cudf::util::round_up_safe(stripe_sizes[stripe_idx], BUFFER_PADDING_MULTIPLE), _stream); - } - } } void reader::impl::pass_preprocess() { + auto const rows_to_read = _file_itm_data->rows_to_read; + auto const& selected_stripes = _file_itm_data->selected_stripes; + + // If no rows or stripes to read, return empty columns + if (rows_to_read == 0 || selected_stripes.empty()) { return; } + if (_file_itm_data->pass_preprocessed) { return; } _file_itm_data->pass_preprocessed = true; - auto const rows_to_read = _file_itm_data->rows_to_read; - auto const& selected_stripes = _file_itm_data->selected_stripes; - auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; - auto& lvl_stripe_sizes = _file_itm_data->lvl_stripe_sizes; - auto& read_info = _file_itm_data->stream_read_info; + auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; + auto& lvl_stripe_sizes = _file_itm_data->lvl_stripe_sizes; + auto& read_info = _file_itm_data->stream_read_info; std::size_t num_stripes = selected_stripes.size(); @@ -393,6 +387,12 @@ void reader::impl::pass_preprocess() void reader::impl::subpass_preprocess() { + auto const rows_to_read = _file_itm_data->rows_to_read; + auto const& selected_stripes = _file_itm_data->selected_stripes; + + // If no rows or stripes to read, return empty columns + if (rows_to_read == 0 || selected_stripes.empty()) { return; } + if (_file_itm_data->subpass_preprocessed) { return; } _file_itm_data->subpass_preprocessed = true; From ff8497783ecc02a693fbd63b0534d479378b946a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 28 Jan 2024 11:16:21 -0800 Subject: [PATCH 043/321] Cleanup Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 15 ++++++--------- cpp/src/io/orc/reader_impl_chunking.hpp | 3 +++ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index cc0fd46efaa..bb1df98a6b3 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -171,6 +171,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, std::vector> const& stripes) { if (_file_itm_data == nullptr) { _file_itm_data = std::make_unique(); } + if (_file_itm_data->has_no_data()) { return; } if (_file_itm_data->global_preprocessed) { return; } // TODO: move this to end of func. @@ -184,9 +185,6 @@ void reader::impl::global_preprocess(uint64_t skip_rows, auto const rows_to_read = _file_itm_data->rows_to_read; auto const& selected_stripes = _file_itm_data->selected_stripes; - // If no rows or stripes to read, return empty columns - if (rows_to_read == 0 || selected_stripes.empty()) { return; } - auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; auto& lvl_stripe_sizes = _file_itm_data->lvl_stripe_sizes; lvl_stripe_data.resize(_selected_columns.num_levels()); @@ -204,6 +202,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // Get the total number of stripes across all input files. std::size_t num_stripes = selected_stripes.size(); + // Prepare data. // Iterates through levels of nested columns, child column will be one level down // compared to parent column. auto& col_meta = *_col_meta; @@ -306,12 +305,11 @@ void reader::impl::global_preprocess(uint64_t skip_rows, void reader::impl::pass_preprocess() { + if (_file_itm_data->has_no_data()) { return; } + auto const rows_to_read = _file_itm_data->rows_to_read; auto const& selected_stripes = _file_itm_data->selected_stripes; - // If no rows or stripes to read, return empty columns - if (rows_to_read == 0 || selected_stripes.empty()) { return; } - if (_file_itm_data->pass_preprocessed) { return; } _file_itm_data->pass_preprocessed = true; @@ -387,12 +385,11 @@ void reader::impl::pass_preprocess() void reader::impl::subpass_preprocess() { + if (_file_itm_data->has_no_data()) { return; } + auto const rows_to_read = _file_itm_data->rows_to_read; auto const& selected_stripes = _file_itm_data->selected_stripes; - // If no rows or stripes to read, return empty columns - if (rows_to_read == 0 || selected_stripes.empty()) { return; } - if (_file_itm_data->subpass_preprocessed) { return; } _file_itm_data->subpass_preprocessed = true; diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 83a61703f20..8142f2bf270 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -104,6 +104,9 @@ struct chunk { * @brief Struct to store file-level data that remains constant for all chunks being read. */ struct file_intermediate_data { + // If no rows or stripes to read, return empty columns + bool has_no_data() const { return rows_to_read > 0 && selected_stripes.empty(); } + std::unordered_map compinfo_map; // bool compinfo_ready{false}; From ddffedd4457aa53035434c42a0b2445bee15740b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 28 Jan 2024 11:32:59 -0800 Subject: [PATCH 044/321] Add chunk validation Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 44 ++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index bb1df98a6b3..050df586c66 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -108,7 +108,7 @@ std::size_t gather_stream_info(std::size_t stripe_index, } struct cumulative_size { - std::size_t count; + int64_t count; std::size_t size_bytes; }; @@ -164,6 +164,33 @@ std::vector find_splits(host_span sizes, } #endif +void verify_splits(host_span splits, + host_span sizes, + size_type total_count, + size_t size_limit) +{ + chunk last_split{0, 0}; + int64_t count{0}; + for (auto const& split : splits) { + CUDF_EXPECTS(split.count > 0, "Invalid split count."); + CUDF_EXPECTS(last_split.start_idx + last_split.count == split.start_idx, + "Invalid split start_idx."); + count += split.count; + last_split = split; + + if (split.count > 1) { + std::size_t size{0}; + for (int64_t i = split.start_idx; i < split.start_idx + split.count; ++i) { + size += sizes[i].size_bytes; + } + CUDF_EXPECTS(size < size_limit, "Chunk total size exceeds limit."); + } + } + CUDF_EXPECTS(last_split.start_idx + last_split.count == sizes[sizes.size() - 1].count, + "Invalid split start_idx."); + CUDF_EXPECTS(count == total_count, "Invalid total count."); +} + } // namespace void reader::impl::global_preprocess(uint64_t skip_rows, @@ -202,6 +229,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // Get the total number of stripes across all input files. std::size_t num_stripes = selected_stripes.size(); + // TODO: Check if these data depends on pass and subpass, instead of global pass. // Prepare data. // Iterates through levels of nested columns, child column will be one level down // compared to parent column. @@ -289,7 +317,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, _file_itm_data->load_stripe_chunks = find_splits( total_stripe_sizes, - total_stripe_sizes.size(), + num_stripes, /*chunk_size_limit/2*/ total_stripe_sizes[total_stripe_sizes.size() - 1].size_bytes / 3); auto& splits = _file_itm_data->load_stripe_chunks; @@ -301,6 +329,18 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // std::cout << " total rows: " << _file_itm_data.rows_to_read << std::endl; // print_cumulative_row_info(stripe_size_bytes, " ", _chunk_read_info.chunks); + + // We need to verify that: + // 1. All chunk must have count > 0 + // 2. Chunks are continuous. + // 3. sum(sizes of stripes in a chunk) < size_limit if chunk has more than 1 stripe + // 4. sum(number of stripes in all chunks) == total_num_stripes. + // TODO: enable only in debug. + verify_splits( + splits, + total_stripe_sizes, + num_stripes, + /*chunk_size_limit/2*/ total_stripe_sizes[total_stripe_sizes.size() - 1].size_bytes / 3); } void reader::impl::pass_preprocess() From 4efa52c28cabbdbac5b8aa756dae47b6039c1432 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 28 Jan 2024 11:53:00 -0800 Subject: [PATCH 045/321] Fix bug Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 5 +++-- cpp/src/io/orc/reader_impl_chunking.hpp | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 050df586c66..3555005f852 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -126,7 +126,7 @@ std::vector find_splits(host_span sizes, { std::vector splits; - uint32_t cur_count = 0; + int64_t cur_count = 0; int64_t cur_pos = 0; size_t cur_cumulative_size = 0; auto const start = thrust::make_transform_iterator( @@ -198,7 +198,6 @@ void reader::impl::global_preprocess(uint64_t skip_rows, std::vector> const& stripes) { if (_file_itm_data == nullptr) { _file_itm_data = std::make_unique(); } - if (_file_itm_data->has_no_data()) { return; } if (_file_itm_data->global_preprocessed) { return; } // TODO: move this to end of func. @@ -208,6 +207,8 @@ void reader::impl::global_preprocess(uint64_t skip_rows, std::tie( _file_itm_data->rows_to_skip, _file_itm_data->rows_to_read, _file_itm_data->selected_stripes) = _metadata.select_stripes(stripes, skip_rows, num_rows_opt, _stream); + if (_file_itm_data->has_no_data()) { return; } + auto const rows_to_skip = _file_itm_data->rows_to_skip; auto const rows_to_read = _file_itm_data->rows_to_read; auto const& selected_stripes = _file_itm_data->selected_stripes; diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 8142f2bf270..61d46d24025 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -105,7 +105,7 @@ struct chunk { */ struct file_intermediate_data { // If no rows or stripes to read, return empty columns - bool has_no_data() const { return rows_to_read > 0 && selected_stripes.empty(); } + bool has_no_data() const { return rows_to_read == 0 || selected_stripes.empty(); } std::unordered_map compinfo_map; From 4ac494fe971ba1f0bf7e949019432a94bd01bc0d Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 28 Jan 2024 14:28:32 -0800 Subject: [PATCH 046/321] Fix bug Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 47 ++++++++++++++++++------- cpp/src/io/orc/reader_impl_chunking.hpp | 2 ++ 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 3555005f852..ce729af8d13 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -171,6 +171,7 @@ void verify_splits(host_span splits, { chunk last_split{0, 0}; int64_t count{0}; + size_t cur_cumulative_size{0}; for (auto const& split : splits) { CUDF_EXPECTS(split.count > 0, "Invalid split count."); CUDF_EXPECTS(last_split.start_idx + last_split.count == split.start_idx, @@ -179,12 +180,28 @@ void verify_splits(host_span splits, last_split = split; if (split.count > 1) { - std::size_t size{0}; - for (int64_t i = split.start_idx; i < split.start_idx + split.count; ++i) { - size += sizes[i].size_bytes; + // printf("split: %ld - %ld, size: %zu, limit: %zu\n", + // split.start_idx, + // split.count, + // sizes[split.start_idx + split.count - 1].size_bytes - cur_cumulative_size, + // size_limit); + // fflush(stdout); + CUDF_EXPECTS( + sizes[split.start_idx + split.count - 1].size_bytes - cur_cumulative_size <= size_limit, + "Chunk total size exceeds limit."); + if (split.start_idx + split.count < total_count) { + // printf("wrong split: %ld - %ld, size: %zu, limit: %zu\n", + // split.start_idx, + // split.count + 1, + // sizes[split.start_idx + split.count].size_bytes - cur_cumulative_size, + // size_limit); + + CUDF_EXPECTS( + sizes[split.start_idx + split.count].size_bytes - cur_cumulative_size > size_limit, + "Invalid split."); } - CUDF_EXPECTS(size < size_limit, "Chunk total size exceeds limit."); } + cur_cumulative_size = sizes[split.start_idx + split.count - 1].size_bytes; } CUDF_EXPECTS(last_split.start_idx + last_split.count == sizes[sizes.size() - 1].count, "Invalid split start_idx."); @@ -316,11 +333,17 @@ void reader::impl::global_preprocess(uint64_t skip_rows, total_stripe_sizes.device_to_host_sync(_stream); - _file_itm_data->load_stripe_chunks = find_splits( - total_stripe_sizes, - num_stripes, - /*chunk_size_limit/2*/ total_stripe_sizes[total_stripe_sizes.size() - 1].size_bytes / 3); + // for (auto& size : total_stripe_sizes) { + // printf("size: %ld, %zu\n", size.count, size.size_bytes); + // } + auto limit = total_stripe_sizes[total_stripe_sizes.size() - 1].size_bytes / 3; + + _file_itm_data->load_stripe_chunks = find_splits(total_stripe_sizes, + num_stripes, + /*chunk_size_limit/2*/ limit); + +#if 0 auto& splits = _file_itm_data->load_stripe_chunks; printf("------------\nSplits (/%d): \n", (int)num_stripes); for (size_t idx = 0; idx < splits.size(); idx++) { @@ -337,13 +360,11 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // 3. sum(sizes of stripes in a chunk) < size_limit if chunk has more than 1 stripe // 4. sum(number of stripes in all chunks) == total_num_stripes. // TODO: enable only in debug. - verify_splits( - splits, - total_stripe_sizes, - num_stripes, - /*chunk_size_limit/2*/ total_stripe_sizes[total_stripe_sizes.size() - 1].size_bytes / 3); + verify_splits(splits, total_stripe_sizes, num_stripes, limit); +#endif } +// Load each chunk from `load_stripe_chunks`. void reader::impl::pass_preprocess() { if (_file_itm_data->has_no_data()) { return; } diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 61d46d24025..5c8770655c9 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -117,9 +117,11 @@ struct file_intermediate_data { std::vector> lvl_data_chunks; std::vector> lvl_stream_info; + // Chunks of stripes that can be load such that total of their data size is within a limit. std::vector load_stripe_chunks; std::size_t curr_load_stripe_chunk{0}; + // Chunks of stripes such that total of their decompression size is within a limit. std::vector decode_stripe_chunks; std::size_t curr_decode_stripe_chunk{0}; From 71789814f436b7fbc39ee82a66f0508eb3f3b0e1 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 28 Jan 2024 15:03:38 -0800 Subject: [PATCH 047/321] Use limit Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 37 +++++++++++++++---------- cpp/src/io/orc/reader_impl_chunking.hpp | 3 ++ 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index ce729af8d13..850e7d12e6a 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -121,24 +121,28 @@ struct cumulative_size_sum { #if 1 std::vector find_splits(host_span sizes, - size_type total_count, + int64_t total_count, size_t size_limit) { + // if (size_limit == 0) { return {chunk{0, total_count}}; } + CUDF_EXPECTS(size_limit > 0, "Invalid size limit"); + std::vector splits; + int64_t cur_count{0}; + int64_t cur_pos{0}; + size_t cur_cumulative_size{0}; - int64_t cur_count = 0; - int64_t cur_pos = 0; - size_t cur_cumulative_size = 0; - auto const start = thrust::make_transform_iterator( + auto const start = thrust::make_transform_iterator( sizes.begin(), [&](auto const& size) { return size.size_bytes - cur_cumulative_size; }); auto const end = start + static_cast(sizes.size()); - while (cur_count < static_cast(total_count)) { + + while (cur_count < total_count) { int64_t split_pos = thrust::distance(start, thrust::lower_bound(thrust::seq, start + cur_pos, end, size_limit)); // If we're past the end, or if the returned bucket is bigger than the chunk_read_limit, move // back one. - if (static_cast(split_pos) >= sizes.size() || + if (static_cast(split_pos) >= sizes.size() || (sizes[split_pos].size_bytes - cur_cumulative_size > size_limit)) { split_pos--; } @@ -164,6 +168,7 @@ std::vector find_splits(host_span sizes, } #endif +#ifdef PRINT_DEBUG void verify_splits(host_span splits, host_span sizes, size_type total_count, @@ -207,6 +212,7 @@ void verify_splits(host_span splits, "Invalid split start_idx."); CUDF_EXPECTS(count == total_count, "Invalid total count."); } +#endif } // namespace @@ -323,6 +329,12 @@ void reader::impl::global_preprocess(uint64_t skip_rows, total_stripe_sizes[stripe_idx] = {1, total_stripe_size}; } + // Load all chunks if there is no read limit. + if (_file_itm_data->read_size_limit == 0) { + _file_itm_data->load_stripe_chunks = {chunk{0, static_cast(num_stripes)}}; + return; + } + // Compute the prefix sum of stripe data sizes. total_stripe_sizes.host_to_device_async(_stream); thrust::inclusive_scan(rmm::exec_policy(_stream), @@ -337,13 +349,10 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // printf("size: %ld, %zu\n", size.count, size.size_bytes); // } - auto limit = total_stripe_sizes[total_stripe_sizes.size() - 1].size_bytes / 3; - - _file_itm_data->load_stripe_chunks = find_splits(total_stripe_sizes, - num_stripes, - /*chunk_size_limit/2*/ limit); + _file_itm_data->load_stripe_chunks = + find_splits(total_stripe_sizes, num_stripes, _file_itm_data->read_size_limit); -#if 0 +#ifdef PRINT_DEBUG auto& splits = _file_itm_data->load_stripe_chunks; printf("------------\nSplits (/%d): \n", (int)num_stripes); for (size_t idx = 0; idx < splits.size(); idx++) { @@ -360,7 +369,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // 3. sum(sizes of stripes in a chunk) < size_limit if chunk has more than 1 stripe // 4. sum(number of stripes in all chunks) == total_num_stripes. // TODO: enable only in debug. - verify_splits(splits, total_stripe_sizes, num_stripes, limit); + verify_splits(splits, total_stripe_sizes, num_stripes, _file_itm_data->read_size_limit); #endif } diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 5c8770655c9..cd94240a208 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -154,6 +154,9 @@ struct file_intermediate_data { size_type rows_to_read; std::vector selected_stripes; + // TODO: Change this + std::size_t read_size_limit{0}; + bool global_preprocessed{false}; bool pass_preprocessed{false}; bool subpass_preprocessed{false}; From 8759a5451089fd7f62faa1479ee5920f331d9bad Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 28 Jan 2024 17:48:57 -0800 Subject: [PATCH 048/321] Load data using chunk Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 80 +++++++++++++++--------- cpp/src/io/orc/reader_impl_chunking.hpp | 4 +- cpp/src/io/orc/reader_impl_preprocess.cu | 4 +- 3 files changed, 55 insertions(+), 33 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 850e7d12e6a..4f23614fa6f 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -241,7 +241,8 @@ void reader::impl::global_preprocess(uint64_t skip_rows, lvl_stripe_data.resize(_selected_columns.num_levels()); lvl_stripe_sizes.resize(_selected_columns.num_levels()); - auto& read_info = _file_itm_data->stream_read_info; + auto& read_info = _file_itm_data->stream_read_info; + auto& stripe_stream_read_chunks = _file_itm_data->stripe_stream_read_chunks; // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. std::unordered_map @@ -253,6 +254,8 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // Get the total number of stripes across all input files. std::size_t num_stripes = selected_stripes.size(); + stripe_stream_read_chunks.resize(num_stripes); + // TODO: Check if these data depends on pass and subpass, instead of global pass. // Prepare data. // Iterates through levels of nested columns, child column will be one level down @@ -269,7 +272,8 @@ void reader::impl::global_preprocess(uint64_t skip_rows, col_meta.orc_col_map[level][col.id] = col_id++; } - lvl_stripe_data[level].resize(num_stripes); + auto& stripe_data = lvl_stripe_data[level]; + stripe_data.resize(num_stripes); auto& stream_info = _file_itm_data->lvl_stream_info[level]; auto const num_columns = _selected_columns.levels[level].size(); @@ -291,6 +295,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, auto const stripe_footer = stripe.stripe_footer; std::size_t total_stripe_size{0}; + auto const last_read_size = static_cast(read_info.size()); for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto& stream_info = _file_itm_data->lvl_stream_info[level]; auto& stripe_sizes = lvl_stripe_sizes[level]; @@ -327,8 +332,14 @@ void reader::impl::global_preprocess(uint64_t skip_rows, } } total_stripe_sizes[stripe_idx] = {1, total_stripe_size}; + stripe_stream_read_chunks[stripe_idx] = + chunk{last_read_size, static_cast(read_info.size() - last_read_size)}; } + // DEBUG only + _file_itm_data->read_size_limit = + total_stripe_sizes[total_stripe_sizes.size() - 1].size_bytes / 3; + // Load all chunks if there is no read limit. if (_file_itm_data->read_size_limit == 0) { _file_itm_data->load_stripe_chunks = {chunk{0, static_cast(num_stripes)}}; @@ -381,51 +392,61 @@ void reader::impl::pass_preprocess() auto const rows_to_read = _file_itm_data->rows_to_read; auto const& selected_stripes = _file_itm_data->selected_stripes; - if (_file_itm_data->pass_preprocessed) { return; } - _file_itm_data->pass_preprocessed = true; - auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; auto& lvl_stripe_sizes = _file_itm_data->lvl_stripe_sizes; auto& read_info = _file_itm_data->stream_read_info; - std::size_t num_stripes = selected_stripes.size(); - - // TODO: this is a pass + // std::size_t num_stripes = selected_stripes.size(); + auto const stripe_chunk = + _file_itm_data->load_stripe_chunks[_file_itm_data->curr_load_stripe_chunk++]; + auto const stripe_start = stripe_chunk.start_idx; + auto const stripe_end = stripe_chunk.start_idx + stripe_chunk.count; // Prepare the buffer to read raw data onto. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto& stripe_data = lvl_stripe_data[level]; auto& stripe_sizes = lvl_stripe_sizes[level]; - for (std::size_t stripe_idx = 0; stripe_idx < num_stripes; ++stripe_idx) { + for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { stripe_data[stripe_idx] = rmm::device_buffer( cudf::util::round_up_safe(stripe_sizes[stripe_idx], BUFFER_PADDING_MULTIPLE), _stream); } } + auto const& stripe_stream_read_chunks = _file_itm_data->stripe_stream_read_chunks; + std::vector, std::size_t>> read_tasks; // Should not read all, but read stripe by stripe. // read_info should be limited by stripe. // Read level-by-level. // TODO: Test with read and parse/decode column by column. // This is future work. - for (auto const& read : read_info) { - auto& stripe_data = lvl_stripe_data[read.level]; - auto dst_base = static_cast(stripe_data[read.stripe_idx].data()); - - if (_metadata.per_file_metadata[read.source_idx].source->is_device_read_preferred( - read.length)) { - read_tasks.push_back( - std::pair(_metadata.per_file_metadata[read.source_idx].source->device_read_async( - read.offset, read.length, dst_base + read.dst_pos, _stream), - read.length)); - - } else { - auto const buffer = - _metadata.per_file_metadata[read.source_idx].source->host_read(read.offset, read.length); - CUDF_EXPECTS(buffer->size() == read.length, "Unexpected discrepancy in bytes read."); - CUDF_CUDA_TRY(cudaMemcpyAsync( - dst_base + read.dst_pos, buffer->data(), read.length, cudaMemcpyDefault, _stream.value())); - _stream.synchronize(); + for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { + auto const read_chunk = stripe_stream_read_chunks[stripe_idx]; + auto const read_begin = read_chunk.start_idx; + auto const read_end = read_chunk.start_idx + read_chunk.count; + + for (auto read_idx = read_begin; read_idx < read_end; ++read_idx) { + auto const& read = read_info[read_idx]; + auto& stripe_data = lvl_stripe_data[read.level]; + auto dst_base = static_cast(stripe_data[read.stripe_idx].data()); + + if (_metadata.per_file_metadata[read.source_idx].source->is_device_read_preferred( + read.length)) { + read_tasks.push_back( + std::pair(_metadata.per_file_metadata[read.source_idx].source->device_read_async( + read.offset, read.length, dst_base + read.dst_pos, _stream), + read.length)); + + } else { + auto const buffer = + _metadata.per_file_metadata[read.source_idx].source->host_read(read.offset, read.length); + CUDF_EXPECTS(buffer->size() == read.length, "Unexpected discrepancy in bytes read."); + CUDF_CUDA_TRY(cudaMemcpyAsync(dst_base + read.dst_pos, + buffer->data(), + read.length, + cudaMemcpyDefault, + _stream.value())); + _stream.synchronize(); #if 0 // This in theory should be faster, but in practice it's slower. Why? @@ -447,8 +468,10 @@ void reader::impl::pass_preprocess() }), read.length)); #endif + } } } + for (auto& task : read_tasks) { CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); } @@ -461,9 +484,6 @@ void reader::impl::subpass_preprocess() auto const rows_to_read = _file_itm_data->rows_to_read; auto const& selected_stripes = _file_itm_data->selected_stripes; - if (_file_itm_data->subpass_preprocessed) { return; } - _file_itm_data->subpass_preprocessed = true; - auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; // TODO: This is subpass diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index cd94240a208..784cec0ca29 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -120,6 +120,7 @@ struct file_intermediate_data { // Chunks of stripes that can be load such that total of their data size is within a limit. std::vector load_stripe_chunks; std::size_t curr_load_stripe_chunk{0}; + bool more_stripe_to_load() { return curr_load_stripe_chunk < load_stripe_chunks.size(); } // Chunks of stripes such that total of their decompression size is within a limit. std::vector decode_stripe_chunks; @@ -149,6 +150,7 @@ struct file_intermediate_data { std::size_t level; }; std::vector stream_read_info; + std::vector stripe_stream_read_chunks; int64_t rows_to_skip; size_type rows_to_read; @@ -158,8 +160,6 @@ struct file_intermediate_data { std::size_t read_size_limit{0}; bool global_preprocessed{false}; - bool pass_preprocessed{false}; - bool subpass_preprocessed{false}; }; } // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 0a99e831f8f..6bcf012d7e6 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -755,7 +755,9 @@ void reader::impl::prepare_data(uint64_t skip_rows, if (_selected_columns.num_levels() == 0) { return; } global_preprocess(skip_rows, num_rows_opt, stripes); - pass_preprocess(); + while (_file_itm_data->more_stripe_to_load()) { + pass_preprocess(); + } subpass_preprocess(); auto const rows_to_skip = _file_itm_data->rows_to_skip; From d811e0f0088620e885df78585f7f6efe6cd7ff1b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 28 Jan 2024 18:42:54 -0800 Subject: [PATCH 049/321] Parse stream by chunk Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 143 ++++++++++++++--------- cpp/src/io/orc/reader_impl_chunking.hpp | 6 +- cpp/src/io/orc/reader_impl_preprocess.cu | 9 +- 3 files changed, 100 insertions(+), 58 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 4f23614fa6f..7a126c8f653 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -243,6 +243,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, auto& read_info = _file_itm_data->stream_read_info; auto& stripe_stream_read_chunks = _file_itm_data->stripe_stream_read_chunks; + auto& lvl_stripe_stream_chunks = _file_itm_data->lvl_stripe_stream_chunks; // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. std::unordered_map @@ -255,6 +256,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, std::size_t num_stripes = selected_stripes.size(); stripe_stream_read_chunks.resize(num_stripes); + lvl_stripe_stream_chunks.resize(_selected_columns.num_levels()); // TODO: Check if these data depends on pass and subpass, instead of global pass. // Prepare data. @@ -284,6 +286,9 @@ void reader::impl::global_preprocess(uint64_t skip_rows, if (read_info.capacity() < selected_stripes.size()) { read_info.reserve(selected_stripes.size() * num_columns); // final size is unknown } + + auto& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; + stripe_stream_chunks.resize(num_stripes); } cudf::detail::hostdevice_vector total_stripe_sizes(num_stripes, _stream); @@ -300,8 +305,8 @@ void reader::impl::global_preprocess(uint64_t skip_rows, auto& stream_info = _file_itm_data->lvl_stream_info[level]; auto& stripe_sizes = lvl_stripe_sizes[level]; - auto stream_count = stream_info.size(); - auto const stripe_size = gather_stream_info(stripe_idx, + auto stream_count = stream_info.size(); + auto const stripe_size = gather_stream_info(stripe_idx, level, stripe_info, stripe_footer, @@ -309,13 +314,19 @@ void reader::impl::global_preprocess(uint64_t skip_rows, _metadata.get_types(), level == 0, stream_info); - stripe_sizes[stripe_idx] = stripe_size; - total_stripe_size += stripe_size; auto const is_stripe_data_empty = stripe_size == 0; CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, "Invalid index rowgroup stream data"); + stripe_sizes[stripe_idx] = stripe_size; + total_stripe_size += stripe_size; + + auto& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; + stripe_stream_chunks[stripe_idx] = + chunk{static_cast(stream_count), + static_cast(stream_info.size() - stream_count)}; + // Coalesce consecutive streams into one read while (not is_stripe_data_empty and stream_count < stream_info.size()) { auto const d_dst = stream_info[stream_count].dst_pos; @@ -425,6 +436,7 @@ void reader::impl::pass_preprocess() auto const read_begin = read_chunk.start_idx; auto const read_end = read_chunk.start_idx + read_chunk.count; + // TODO: instead of loop stripe => loop read, we can directly loop read of first + last stripe for (auto read_idx = read_begin; read_idx < read_end; ++read_idx) { auto const& read = read_info[read_idx]; auto& stripe_data = lvl_stripe_data[read.level]; @@ -484,13 +496,18 @@ void reader::impl::subpass_preprocess() auto const rows_to_read = _file_itm_data->rows_to_read; auto const& selected_stripes = _file_itm_data->selected_stripes; - auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; + auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; + auto& lvl_stripe_stream_chunks = _file_itm_data->lvl_stripe_stream_chunks; // TODO: This is subpass // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. std::unordered_map stream_compinfo_map; + // TODO: fix this, loop only current chunk + auto const stripe_chunk = + _file_itm_data->load_stripe_chunks[_file_itm_data->curr_load_stripe_chunk++]; + // Parse the decompressed sizes for each stripe. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto& stream_info = _file_itm_data->lvl_stream_info[level]; @@ -500,67 +517,81 @@ void reader::impl::subpass_preprocess() auto& stripe_data = lvl_stripe_data[level]; if (stripe_data.empty()) { continue; } - // Setup row group descriptors if using indexes - if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { - auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; - cudf::detail::hostdevice_vector compinfo( - 0, stream_info.size(), _stream); - - for (auto const& info : stream_info) { - compinfo.push_back(gpu::CompressedStreamInfo( - static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, - info.length)); - stream_compinfo_map[stream_id_info{ - info.stripe_idx, info.level, info.orc_col_idx, info.kind}] = - &compinfo[compinfo.size() - 1]; + auto const& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; + + auto const stripe_start = stripe_chunk.start_idx; + auto const stripe_end = stripe_chunk.start_idx + stripe_chunk.count; + for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { + auto const stream_chunk = stripe_stream_chunks[stripe_idx]; + auto const stream_start = stream_chunk.start_idx; + auto const stream_end = stream_chunk.start_idx + stream_chunk.count; + + // Setup row group descriptors if using indexes + if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { + auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; + cudf::detail::hostdevice_vector compinfo( + 0, /*stream_info.size()*/ stream_chunk.count, _stream); + + // TODO: Instead of all stream info, loop using read_chunk info to process + // only stream info of the curr_load_stripe_chunk. + + for (auto stream_idx = stream_start; stream_idx < stream_end; ++stream_idx) { + auto const& info = stream_info[stream_idx]; + compinfo.push_back(gpu::CompressedStreamInfo( + static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, + info.length)); + stream_compinfo_map[stream_id_info{ + info.stripe_idx, info.level, info.orc_col_idx, info.kind}] = + &compinfo[compinfo.size() - 1]; #ifdef PRINT_DEBUG - printf("collec stream [%d, %d, %d, %d]: dst = %lu, length = %lu\n", - (int)info.stripe_idx, - (int)info.level, - (int)info.orc_col_idx, - (int)info.kind, - info.dst_pos, - info.length); - fflush(stdout); + printf("collec stream [%d, %d, %d, %d]: dst = %lu, length = %lu\n", + (int)info.stripe_idx, + (int)info.level, + (int)info.orc_col_idx, + (int)info.kind, + info.dst_pos, + info.length); + fflush(stdout); #endif - } + } - compinfo.host_to_device_async(_stream); + compinfo.host_to_device_async(_stream); - gpu::ParseCompressedStripeData(compinfo.device_ptr(), - compinfo.size(), - decompressor.GetBlockSize(), - decompressor.GetLog2MaxCompressionRatio(), - _stream); - compinfo.device_to_host_sync(_stream); + gpu::ParseCompressedStripeData(compinfo.device_ptr(), + compinfo.size(), + decompressor.GetBlockSize(), + decompressor.GetLog2MaxCompressionRatio(), + _stream); + compinfo.device_to_host_sync(_stream); - auto& compinfo_map = _file_itm_data->compinfo_map; - for (auto& [stream_id, stream_compinfo] : stream_compinfo_map) { - compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, - stream_compinfo->num_uncompressed_blocks, - stream_compinfo->max_uncompressed_size}; + auto& compinfo_map = _file_itm_data->compinfo_map; + for (auto& [stream_id, stream_compinfo] : stream_compinfo_map) { + compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, + stream_compinfo->num_uncompressed_blocks, + stream_compinfo->max_uncompressed_size}; #ifdef PRINT_DEBUG - printf("cache info [%d, %d, %d, %d]: %lu | %lu | %lu\n", - (int)stream_id.stripe_idx, - (int)stream_id.level, - (int)stream_id.orc_col_idx, - (int)stream_id.kind, - (size_t)stream_compinfo->num_compressed_blocks, - (size_t)stream_compinfo->num_uncompressed_blocks, - stream_compinfo->max_uncompressed_size); - fflush(stdout); + printf("cache info [%d, %d, %d, %d]: %lu | %lu | %lu\n", + (int)stream_id.stripe_idx, + (int)stream_id.level, + (int)stream_id.orc_col_idx, + (int)stream_id.kind, + (size_t)stream_compinfo->num_compressed_blocks, + (size_t)stream_compinfo->num_uncompressed_blocks, + stream_compinfo->max_uncompressed_size); + fflush(stdout); #endif - } + } - // Must clear so we will not overwrite the old compression info stream_id. - stream_compinfo_map.clear(); + // Must clear so we will not overwrite the old compression info stream_id. + stream_compinfo_map.clear(); - } else { - // printf("no compression \n"); - // fflush(stdout); + } else { + // printf("no compression \n"); + // fflush(stdout); - // Set decompressed data size equal to the input size. - // TODO + // Set decompressed data size equal to the input size. + // TODO + } } // printf(" end level %d\n\n", (int)level); diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 784cec0ca29..b903948ecad 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -150,7 +150,11 @@ struct file_intermediate_data { std::size_t level; }; std::vector stream_read_info; - std::vector stripe_stream_read_chunks; + std::vector stripe_stream_read_chunks; // chunk identify the reading streams (multiple + // streams can be read once) for each stripe + std::vector> + lvl_stripe_stream_chunks; // chunk identify all processing streams for each stripe, need to be + // level-based int64_t rows_to_skip; size_type rows_to_read; diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 6bcf012d7e6..1e619dea571 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -755,10 +755,17 @@ void reader::impl::prepare_data(uint64_t skip_rows, if (_selected_columns.num_levels() == 0) { return; } global_preprocess(skip_rows, num_rows_opt, stripes); + + // TODO: fix this, should be called once while (_file_itm_data->more_stripe_to_load()) { pass_preprocess(); } - subpass_preprocess(); + + // Fix this, subpass should be call once + _file_itm_data->curr_load_stripe_chunk = 0; + while (_file_itm_data->more_stripe_to_load()) { + subpass_preprocess(); + } auto const rows_to_skip = _file_itm_data->rows_to_skip; auto const rows_to_read = _file_itm_data->rows_to_read; From 23e14de58032950693c06eb0b90b3dfbd8e4b7b4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 28 Jan 2024 19:57:05 -0800 Subject: [PATCH 050/321] Add interface functions Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index b903948ecad..f3e582266d7 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -125,6 +125,12 @@ struct file_intermediate_data { // Chunks of stripes such that total of their decompression size is within a limit. std::vector decode_stripe_chunks; std::size_t curr_decode_stripe_chunk{0}; + bool more_stripe_to_decode() { return curr_decode_stripe_chunk < decode_stripe_chunks.size(); } + + // Chunk of rows in the internal decoded table to output for each `read_chunk()`. + std::vector output_table_chunks; + std::size_t curr_output_table_chunk{0}; + bool more_table_chunk_to_output() { return curr_output_table_chunk < output_table_chunks.size(); } // Each read correspond to one or more consecutive stream combined. struct stream_read_info { From 55104115b420630f638f80b1ce158baa7f233146 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 28 Jan 2024 22:11:30 -0800 Subject: [PATCH 051/321] Implement `chunk_read_data` struct Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 2 +- cpp/src/io/orc/reader_impl.hpp | 5 +- cpp/src/io/orc/reader_impl_chunking.cu | 81 ++++++++++++------------ cpp/src/io/orc/reader_impl_chunking.hpp | 63 ++++++++++++------ cpp/src/io/orc/reader_impl_preprocess.cu | 22 +++---- 5 files changed, 100 insertions(+), 73 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index cf3121fe659..e5470df05a2 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -83,7 +83,7 @@ table_with_metadata reader::impl::read_chunk_internal() auto out_metadata = make_output_metadata(); // If no rows or stripes to read, return empty columns - if (_file_itm_data->rows_to_read == 0 || _file_itm_data->selected_stripes.empty()) { + if (_file_itm_data.has_no_data()) { std::transform(_selected_columns.levels[0].begin(), _selected_columns.levels[0].end(), std::back_inserter(out_columns), diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 1b5bef347d0..b2e22a16b85 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -17,6 +17,7 @@ #pragma once #include "aggregate_orc_metadata.hpp" +#include "reader_impl_chunking.hpp" #include @@ -33,7 +34,6 @@ namespace cudf::io::orc::detail { struct reader_column_meta; -struct file_intermediate_data; /** * @brief Implementation for ORC reader. @@ -116,7 +116,8 @@ class reader::impl { std::vector> const _sources; // Unused but owns data for `_metadata` aggregate_orc_metadata _metadata; column_hierarchy const _selected_columns; // Construct from `_metadata` thus declare after it - std::unique_ptr _file_itm_data; + file_intermediate_data _file_itm_data; + chunk_read_data _chunk_read_data; std::unique_ptr _output_metadata; std::vector> _out_buffers; }; diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 7a126c8f653..767b21dd959 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -220,37 +220,36 @@ void reader::impl::global_preprocess(uint64_t skip_rows, std::optional const& num_rows_opt, std::vector> const& stripes) { - if (_file_itm_data == nullptr) { _file_itm_data = std::make_unique(); } - if (_file_itm_data->global_preprocessed) { return; } + if (_file_itm_data.global_preprocessed) { return; } // TODO: move this to end of func. - _file_itm_data->global_preprocessed = true; + _file_itm_data.global_preprocessed = true; // Select only stripes required (aka row groups) std::tie( - _file_itm_data->rows_to_skip, _file_itm_data->rows_to_read, _file_itm_data->selected_stripes) = + _file_itm_data.rows_to_skip, _file_itm_data.rows_to_read, _file_itm_data.selected_stripes) = _metadata.select_stripes(stripes, skip_rows, num_rows_opt, _stream); - if (_file_itm_data->has_no_data()) { return; } + if (_file_itm_data.has_no_data()) { return; } - auto const rows_to_skip = _file_itm_data->rows_to_skip; - auto const rows_to_read = _file_itm_data->rows_to_read; - auto const& selected_stripes = _file_itm_data->selected_stripes; + // auto const rows_to_skip = _file_itm_data.rows_to_skip; + // auto const rows_to_read = _file_itm_data.rows_to_read; + auto const& selected_stripes = _file_itm_data.selected_stripes; - auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; - auto& lvl_stripe_sizes = _file_itm_data->lvl_stripe_sizes; + auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; + auto& lvl_stripe_sizes = _file_itm_data.lvl_stripe_sizes; lvl_stripe_data.resize(_selected_columns.num_levels()); lvl_stripe_sizes.resize(_selected_columns.num_levels()); - auto& read_info = _file_itm_data->stream_read_info; - auto& stripe_stream_read_chunks = _file_itm_data->stripe_stream_read_chunks; - auto& lvl_stripe_stream_chunks = _file_itm_data->lvl_stripe_stream_chunks; + auto& read_info = _file_itm_data.stream_read_info; + auto& stripe_stream_read_chunks = _file_itm_data.stripe_stream_read_chunks; + auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_chunks; // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. std::unordered_map stream_compinfo_map; // Logically view streams as columns - _file_itm_data->lvl_stream_info.resize(_selected_columns.num_levels()); + _file_itm_data.lvl_stream_info.resize(_selected_columns.num_levels()); // Get the total number of stripes across all input files. std::size_t num_stripes = selected_stripes.size(); @@ -277,7 +276,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, auto& stripe_data = lvl_stripe_data[level]; stripe_data.resize(num_stripes); - auto& stream_info = _file_itm_data->lvl_stream_info[level]; + auto& stream_info = _file_itm_data.lvl_stream_info[level]; auto const num_columns = _selected_columns.levels[level].size(); auto& stripe_sizes = lvl_stripe_sizes[level]; stream_info.reserve(selected_stripes.size() * num_columns); // final size is unknown @@ -302,7 +301,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, std::size_t total_stripe_size{0}; auto const last_read_size = static_cast(read_info.size()); for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { - auto& stream_info = _file_itm_data->lvl_stream_info[level]; + auto& stream_info = _file_itm_data.lvl_stream_info[level]; auto& stripe_sizes = lvl_stripe_sizes[level]; auto stream_count = stream_info.size(); @@ -348,12 +347,12 @@ void reader::impl::global_preprocess(uint64_t skip_rows, } // DEBUG only - _file_itm_data->read_size_limit = + _chunk_read_data.read_size_limit = total_stripe_sizes[total_stripe_sizes.size() - 1].size_bytes / 3; // Load all chunks if there is no read limit. - if (_file_itm_data->read_size_limit == 0) { - _file_itm_data->load_stripe_chunks = {chunk{0, static_cast(num_stripes)}}; + if (_chunk_read_data.read_size_limit == 0) { + _chunk_read_data.load_stripe_chunks = {chunk{0, static_cast(num_stripes)}}; return; } @@ -371,11 +370,11 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // printf("size: %ld, %zu\n", size.count, size.size_bytes); // } - _file_itm_data->load_stripe_chunks = - find_splits(total_stripe_sizes, num_stripes, _file_itm_data->read_size_limit); + _chunk_read_data.load_stripe_chunks = + find_splits(total_stripe_sizes, num_stripes, _chunk_read_data.read_size_limit); #ifdef PRINT_DEBUG - auto& splits = _file_itm_data->load_stripe_chunks; + auto& splits = _file_itm_data.load_stripe_chunks; printf("------------\nSplits (/%d): \n", (int)num_stripes); for (size_t idx = 0; idx < splits.size(); idx++) { printf("{%ld, %ld}\n", splits[idx].start_idx, splits[idx].count); @@ -391,25 +390,25 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // 3. sum(sizes of stripes in a chunk) < size_limit if chunk has more than 1 stripe // 4. sum(number of stripes in all chunks) == total_num_stripes. // TODO: enable only in debug. - verify_splits(splits, total_stripe_sizes, num_stripes, _file_itm_data->read_size_limit); + verify_splits(splits, total_stripe_sizes, num_stripes, _file_itm_data.read_size_limit); #endif } // Load each chunk from `load_stripe_chunks`. void reader::impl::pass_preprocess() { - if (_file_itm_data->has_no_data()) { return; } + if (_file_itm_data.has_no_data()) { return; } - auto const rows_to_read = _file_itm_data->rows_to_read; - auto const& selected_stripes = _file_itm_data->selected_stripes; + // auto const rows_to_read = _file_itm_data.rows_to_read; + // auto const& selected_stripes = _file_itm_data.selected_stripes; - auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; - auto& lvl_stripe_sizes = _file_itm_data->lvl_stripe_sizes; - auto& read_info = _file_itm_data->stream_read_info; + auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; + auto& lvl_stripe_sizes = _file_itm_data.lvl_stripe_sizes; + auto& read_info = _file_itm_data.stream_read_info; // std::size_t num_stripes = selected_stripes.size(); auto const stripe_chunk = - _file_itm_data->load_stripe_chunks[_file_itm_data->curr_load_stripe_chunk++]; + _chunk_read_data.load_stripe_chunks[_chunk_read_data.curr_load_stripe_chunk++]; auto const stripe_start = stripe_chunk.start_idx; auto const stripe_end = stripe_chunk.start_idx + stripe_chunk.count; @@ -423,7 +422,7 @@ void reader::impl::pass_preprocess() } } - auto const& stripe_stream_read_chunks = _file_itm_data->stripe_stream_read_chunks; + auto const& stripe_stream_read_chunks = _file_itm_data.stripe_stream_read_chunks; std::vector, std::size_t>> read_tasks; // Should not read all, but read stripe by stripe. @@ -491,13 +490,13 @@ void reader::impl::pass_preprocess() void reader::impl::subpass_preprocess() { - if (_file_itm_data->has_no_data()) { return; } + if (_file_itm_data.has_no_data()) { return; } - auto const rows_to_read = _file_itm_data->rows_to_read; - auto const& selected_stripes = _file_itm_data->selected_stripes; + // auto const rows_to_read = _file_itm_data.rows_to_read; + // auto const& selected_stripes = _file_itm_data.selected_stripes; - auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; - auto& lvl_stripe_stream_chunks = _file_itm_data->lvl_stripe_stream_chunks; + auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; + auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_chunks; // TODO: This is subpass // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. @@ -506,11 +505,11 @@ void reader::impl::subpass_preprocess() // TODO: fix this, loop only current chunk auto const stripe_chunk = - _file_itm_data->load_stripe_chunks[_file_itm_data->curr_load_stripe_chunk++]; + _chunk_read_data.load_stripe_chunks[_chunk_read_data.curr_load_stripe_chunk++]; // Parse the decompressed sizes for each stripe. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { - auto& stream_info = _file_itm_data->lvl_stream_info[level]; + auto& stream_info = _file_itm_data.lvl_stream_info[level]; auto const num_columns = _selected_columns.levels[level].size(); // Tracker for eventually deallocating compressed and uncompressed data @@ -564,7 +563,7 @@ void reader::impl::subpass_preprocess() _stream); compinfo.device_to_host_sync(_stream); - auto& compinfo_map = _file_itm_data->compinfo_map; + auto& compinfo_map = _file_itm_data.compinfo_map; for (auto& [stream_id, stream_compinfo] : stream_compinfo_map) { compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, stream_compinfo->num_uncompressed_blocks, @@ -599,7 +598,7 @@ void reader::impl::subpass_preprocess() } // end loop level // lvl_stripe_data.clear(); - // _file_itm_data->compinfo_ready = true; + // _file_itm_data.compinfo_ready = true; } } // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index f3e582266d7..e8d071aae57 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -100,6 +100,11 @@ struct chunk { int64_t count; }; +struct range { + int64_t begin; + int64_t end; +}; + /** * @brief Struct to store file-level data that remains constant for all chunks being read. */ @@ -117,21 +122,6 @@ struct file_intermediate_data { std::vector> lvl_data_chunks; std::vector> lvl_stream_info; - // Chunks of stripes that can be load such that total of their data size is within a limit. - std::vector load_stripe_chunks; - std::size_t curr_load_stripe_chunk{0}; - bool more_stripe_to_load() { return curr_load_stripe_chunk < load_stripe_chunks.size(); } - - // Chunks of stripes such that total of their decompression size is within a limit. - std::vector decode_stripe_chunks; - std::size_t curr_decode_stripe_chunk{0}; - bool more_stripe_to_decode() { return curr_decode_stripe_chunk < decode_stripe_chunks.size(); } - - // Chunk of rows in the internal decoded table to output for each `read_chunk()`. - std::vector output_table_chunks; - std::size_t curr_output_table_chunk{0}; - bool more_table_chunk_to_output() { return curr_output_table_chunk < output_table_chunks.size(); } - // Each read correspond to one or more consecutive stream combined. struct stream_read_info { stream_read_info(uint64_t offset_, @@ -166,10 +156,47 @@ struct file_intermediate_data { size_type rows_to_read; std::vector selected_stripes; - // TODO: Change this - std::size_t read_size_limit{0}; - bool global_preprocessed{false}; }; +/** + * @brief Struct to store all data necessary for chunked reading. + */ +struct chunk_read_data { + explicit chunk_read_data(std::size_t output_size_limit_ = 0, std::size_t read_size_limit_ = 0) + : output_size_limit{output_size_limit_}, read_size_limit(read_size_limit_) + { + } + + std::size_t output_size_limit; // Maximum size (in bytes) of an output chunk, or 0 for no limit + std::size_t read_size_limit; // Maximum size (in bytes) of an output chunk, or 0 for no limit + + // Chunks of stripes that can be load such that total of their data size is within a limit. + std::vector load_stripe_chunks; + std::size_t curr_load_stripe_chunk{0}; + bool more_stripe_to_load() const { return curr_load_stripe_chunk < load_stripe_chunks.size(); } + + // Chunks of stripes such that total of their decompression size is within a limit. + std::vector decode_stripe_chunks; + std::size_t curr_decode_stripe_chunk{0}; + bool more_stripe_to_decode() const + { + return curr_decode_stripe_chunk < decode_stripe_chunks.size(); + } + + // Chunk of rows in the internal decoded table to output for each `read_chunk()`. + std::vector output_table_chunks; + std::size_t curr_output_table_chunk{0}; + bool more_table_chunk_to_output() const + { + return curr_output_table_chunk < output_table_chunks.size(); + } + + // Only has more chunk to output if: + bool has_next() const + { + return more_stripe_to_load() || more_stripe_to_decode() || more_table_chunk_to_output(); + } +}; + } // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 1e619dea571..c1f8b7a12df 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -757,19 +757,19 @@ void reader::impl::prepare_data(uint64_t skip_rows, global_preprocess(skip_rows, num_rows_opt, stripes); // TODO: fix this, should be called once - while (_file_itm_data->more_stripe_to_load()) { + while (_chunk_read_data.more_stripe_to_load()) { pass_preprocess(); } // Fix this, subpass should be call once - _file_itm_data->curr_load_stripe_chunk = 0; - while (_file_itm_data->more_stripe_to_load()) { + _chunk_read_data.curr_load_stripe_chunk = 0; + while (_chunk_read_data.more_stripe_to_load()) { subpass_preprocess(); } - auto const rows_to_skip = _file_itm_data->rows_to_skip; - auto const rows_to_read = _file_itm_data->rows_to_read; - auto const& selected_stripes = _file_itm_data->selected_stripes; + auto const rows_to_skip = _file_itm_data.rows_to_skip; + auto const rows_to_read = _file_itm_data.rows_to_read; + auto const& selected_stripes = _file_itm_data.selected_stripes; // If no rows or stripes to read, return empty columns if (rows_to_read == 0 || selected_stripes.empty()) { return; } @@ -788,9 +788,9 @@ void reader::impl::prepare_data(uint64_t skip_rows, : std::make_unique(); }(); - auto& lvl_stripe_data = _file_itm_data->lvl_stripe_data; - auto& null_count_prefix_sums = _file_itm_data->null_count_prefix_sums; - auto& lvl_chunks = _file_itm_data->lvl_data_chunks; + auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; + auto& null_count_prefix_sums = _file_itm_data.null_count_prefix_sums; + auto& lvl_chunks = _file_itm_data.lvl_data_chunks; lvl_stripe_data.resize(_selected_columns.num_levels()); lvl_chunks.resize(_selected_columns.num_levels()); @@ -851,7 +851,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, (rows_to_skip == 0); // Logically view streams as columns - auto const& stream_info = _file_itm_data->lvl_stream_info[level]; + auto const& stream_info = _file_itm_data.lvl_stream_info[level]; null_count_prefix_sums.emplace_back(); null_count_prefix_sums.back().reserve(_selected_columns.levels[level].size()); @@ -978,7 +978,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, } // Setup row group descriptors if using indexes if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { - auto decomp_data = decompress_stripe_data(_file_itm_data->compinfo_map, + auto decomp_data = decompress_stripe_data(_file_itm_data.compinfo_map, *_metadata.per_file_metadata[0].decompressor, stripe_data, stream_info, From 2dd88a0750c12aa8efd11163eef33e23b7b005ab Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 28 Jan 2024 22:24:29 -0800 Subject: [PATCH 052/321] Host read with sync just once Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 767b21dd959..166b4ea8264 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -424,7 +424,9 @@ void reader::impl::pass_preprocess() auto const& stripe_stream_read_chunks = _file_itm_data.stripe_stream_read_chunks; + std::vector> host_read_buffers; std::vector, std::size_t>> read_tasks; + // Should not read all, but read stripe by stripe. // read_info should be limited by stripe. // Read level-by-level. @@ -449,7 +451,7 @@ void reader::impl::pass_preprocess() read.length)); } else { - auto const buffer = + auto buffer = _metadata.per_file_metadata[read.source_idx].source->host_read(read.offset, read.length); CUDF_EXPECTS(buffer->size() == read.length, "Unexpected discrepancy in bytes read."); CUDF_CUDA_TRY(cudaMemcpyAsync(dst_base + read.dst_pos, @@ -457,7 +459,8 @@ void reader::impl::pass_preprocess() read.length, cudaMemcpyDefault, _stream.value())); - _stream.synchronize(); + // _stream.synchronize(); + host_read_buffers.emplace_back(std::move(buffer)); #if 0 // This in theory should be faster, but in practice it's slower. Why? @@ -482,7 +485,7 @@ void reader::impl::pass_preprocess() } } } - + if (host_read_buffers.size() > 0) { _stream.synchronize(); } for (auto& task : read_tasks) { CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); } From d2bb9111615955377f7f03b83a043e6ae6573f46 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Jan 2024 08:23:55 -0800 Subject: [PATCH 053/321] Simplify loops Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 187 ++++++++++++------------- 1 file changed, 88 insertions(+), 99 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 166b4ea8264..86fe621c702 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -427,40 +427,31 @@ void reader::impl::pass_preprocess() std::vector> host_read_buffers; std::vector, std::size_t>> read_tasks; - // Should not read all, but read stripe by stripe. - // read_info should be limited by stripe. - // Read level-by-level. - // TODO: Test with read and parse/decode column by column. - // This is future work. - for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { - auto const read_chunk = stripe_stream_read_chunks[stripe_idx]; - auto const read_begin = read_chunk.start_idx; - auto const read_end = read_chunk.start_idx + read_chunk.count; - - // TODO: instead of loop stripe => loop read, we can directly loop read of first + last stripe - for (auto read_idx = read_begin; read_idx < read_end; ++read_idx) { - auto const& read = read_info[read_idx]; - auto& stripe_data = lvl_stripe_data[read.level]; - auto dst_base = static_cast(stripe_data[read.stripe_idx].data()); - - if (_metadata.per_file_metadata[read.source_idx].source->is_device_read_preferred( - read.length)) { - read_tasks.push_back( - std::pair(_metadata.per_file_metadata[read.source_idx].source->device_read_async( - read.offset, read.length, dst_base + read.dst_pos, _stream), - read.length)); - - } else { - auto buffer = - _metadata.per_file_metadata[read.source_idx].source->host_read(read.offset, read.length); - CUDF_EXPECTS(buffer->size() == read.length, "Unexpected discrepancy in bytes read."); - CUDF_CUDA_TRY(cudaMemcpyAsync(dst_base + read.dst_pos, - buffer->data(), - read.length, - cudaMemcpyDefault, - _stream.value())); - // _stream.synchronize(); - host_read_buffers.emplace_back(std::move(buffer)); + auto const stripe_first_chunk = stripe_stream_read_chunks[stripe_start]; + auto const stripe_last_chunk = stripe_stream_read_chunks[stripe_end - 1]; + auto const read_begin = stripe_first_chunk.start_idx; + auto const read_end = stripe_last_chunk.start_idx + stripe_last_chunk.count; + + for (auto read_idx = read_begin; read_idx < read_end; ++read_idx) { + auto const& read = read_info[read_idx]; + auto& stripe_data = lvl_stripe_data[read.level]; + auto dst_base = static_cast(stripe_data[read.stripe_idx].data()); + + if (_metadata.per_file_metadata[read.source_idx].source->is_device_read_preferred( + read.length)) { + read_tasks.push_back( + std::pair(_metadata.per_file_metadata[read.source_idx].source->device_read_async( + read.offset, read.length, dst_base + read.dst_pos, _stream), + read.length)); + + } else { + auto buffer = + _metadata.per_file_metadata[read.source_idx].source->host_read(read.offset, read.length); + CUDF_EXPECTS(buffer->size() == read.length, "Unexpected discrepancy in bytes read."); + CUDF_CUDA_TRY(cudaMemcpyAsync( + dst_base + read.dst_pos, buffer->data(), read.length, cudaMemcpyDefault, _stream.value())); + // _stream.synchronize(); + host_read_buffers.emplace_back(std::move(buffer)); #if 0 // This in theory should be faster, but in practice it's slower. Why? @@ -482,9 +473,9 @@ void reader::impl::pass_preprocess() }), read.length)); #endif - } } } + if (host_read_buffers.size() > 0) { _stream.synchronize(); } for (auto& task : read_tasks) { CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); @@ -520,80 +511,78 @@ void reader::impl::subpass_preprocess() if (stripe_data.empty()) { continue; } auto const& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; - - auto const stripe_start = stripe_chunk.start_idx; - auto const stripe_end = stripe_chunk.start_idx + stripe_chunk.count; - for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { - auto const stream_chunk = stripe_stream_chunks[stripe_idx]; - auto const stream_start = stream_chunk.start_idx; - auto const stream_end = stream_chunk.start_idx + stream_chunk.count; - - // Setup row group descriptors if using indexes - if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { - auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; - cudf::detail::hostdevice_vector compinfo( - 0, /*stream_info.size()*/ stream_chunk.count, _stream); - - // TODO: Instead of all stream info, loop using read_chunk info to process - // only stream info of the curr_load_stripe_chunk. - - for (auto stream_idx = stream_start; stream_idx < stream_end; ++stream_idx) { - auto const& info = stream_info[stream_idx]; - compinfo.push_back(gpu::CompressedStreamInfo( - static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, - info.length)); - stream_compinfo_map[stream_id_info{ - info.stripe_idx, info.level, info.orc_col_idx, info.kind}] = - &compinfo[compinfo.size() - 1]; + auto const stripe_start = stripe_chunk.start_idx; + auto const stripe_end = stripe_chunk.start_idx + stripe_chunk.count; + auto const stripe_first_chunk = stripe_stream_chunks[stripe_start]; + auto const stripe_last_chunk = stripe_stream_chunks[stripe_end - 1]; + auto const stream_begin = stripe_first_chunk.start_idx; + auto const stream_end = stripe_last_chunk.start_idx + stripe_last_chunk.count; + auto const num_streams = stream_end - stream_begin; + + // Setup row group descriptors if using indexes + if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { + auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; + cudf::detail::hostdevice_vector compinfo(0, num_streams, _stream); + + // TODO: Instead of all stream info, loop using read_chunk info to process + // only stream info of the curr_load_stripe_chunk. + + for (auto stream_idx = stream_begin; stream_idx < stream_end; ++stream_idx) { + auto const& info = stream_info[stream_idx]; + compinfo.push_back(gpu::CompressedStreamInfo( + static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, + info.length)); + stream_compinfo_map[stream_id_info{ + info.stripe_idx, info.level, info.orc_col_idx, info.kind}] = + &compinfo[compinfo.size() - 1]; #ifdef PRINT_DEBUG - printf("collec stream [%d, %d, %d, %d]: dst = %lu, length = %lu\n", - (int)info.stripe_idx, - (int)info.level, - (int)info.orc_col_idx, - (int)info.kind, - info.dst_pos, - info.length); - fflush(stdout); + printf("collec stream [%d, %d, %d, %d]: dst = %lu, length = %lu\n", + (int)info.stripe_idx, + (int)info.level, + (int)info.orc_col_idx, + (int)info.kind, + info.dst_pos, + info.length); + fflush(stdout); #endif - } + } - compinfo.host_to_device_async(_stream); + compinfo.host_to_device_async(_stream); - gpu::ParseCompressedStripeData(compinfo.device_ptr(), - compinfo.size(), - decompressor.GetBlockSize(), - decompressor.GetLog2MaxCompressionRatio(), - _stream); - compinfo.device_to_host_sync(_stream); + gpu::ParseCompressedStripeData(compinfo.device_ptr(), + compinfo.size(), + decompressor.GetBlockSize(), + decompressor.GetLog2MaxCompressionRatio(), + _stream); + compinfo.device_to_host_sync(_stream); - auto& compinfo_map = _file_itm_data.compinfo_map; - for (auto& [stream_id, stream_compinfo] : stream_compinfo_map) { - compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, - stream_compinfo->num_uncompressed_blocks, - stream_compinfo->max_uncompressed_size}; + auto& compinfo_map = _file_itm_data.compinfo_map; + for (auto& [stream_id, stream_compinfo] : stream_compinfo_map) { + compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, + stream_compinfo->num_uncompressed_blocks, + stream_compinfo->max_uncompressed_size}; #ifdef PRINT_DEBUG - printf("cache info [%d, %d, %d, %d]: %lu | %lu | %lu\n", - (int)stream_id.stripe_idx, - (int)stream_id.level, - (int)stream_id.orc_col_idx, - (int)stream_id.kind, - (size_t)stream_compinfo->num_compressed_blocks, - (size_t)stream_compinfo->num_uncompressed_blocks, - stream_compinfo->max_uncompressed_size); - fflush(stdout); + printf("cache info [%d, %d, %d, %d]: %lu | %lu | %lu\n", + (int)stream_id.stripe_idx, + (int)stream_id.level, + (int)stream_id.orc_col_idx, + (int)stream_id.kind, + (size_t)stream_compinfo->num_compressed_blocks, + (size_t)stream_compinfo->num_uncompressed_blocks, + stream_compinfo->max_uncompressed_size); + fflush(stdout); #endif - } + } - // Must clear so we will not overwrite the old compression info stream_id. - stream_compinfo_map.clear(); + // Must clear so we will not overwrite the old compression info stream_id. + stream_compinfo_map.clear(); - } else { - // printf("no compression \n"); - // fflush(stdout); + } else { + // printf("no compression \n"); + // fflush(stdout); - // Set decompressed data size equal to the input size. - // TODO - } + // Set decompressed data size equal to the input size. + // TODO } // printf(" end level %d\n\n", (int)level); From a3dff8f70cf8e99f8239cd7b66bb88ded89bdc25 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Jan 2024 08:56:11 -0800 Subject: [PATCH 054/321] Add comments and change variable name Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 4 +++ cpp/src/io/orc/reader_impl_chunking.hpp | 7 ++++- cpp/src/io/orc/reader_impl_preprocess.cu | 37 ++++++++++++------------ 3 files changed, 29 insertions(+), 19 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 86fe621c702..b987cb3d8cf 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -522,6 +522,9 @@ void reader::impl::subpass_preprocess() // Setup row group descriptors if using indexes if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; + + // Cannot be cached, since this is for streams in a loaded stripe chunk, while + // the latter decoding step will use a different stripe chunk. cudf::detail::hostdevice_vector compinfo(0, num_streams, _stream); // TODO: Instead of all stream info, loop using read_chunk info to process @@ -558,6 +561,7 @@ void reader::impl::subpass_preprocess() auto& compinfo_map = _file_itm_data.compinfo_map; for (auto& [stream_id, stream_compinfo] : stream_compinfo_map) { + // Cache these parsed numbers so they can be reused in the decoding step. compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, stream_compinfo->num_uncompressed_blocks, stream_compinfo->max_uncompressed_size}; diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index e8d071aae57..ea2c1d2cd81 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -116,8 +116,13 @@ struct file_intermediate_data { compinfo_map; // bool compinfo_ready{false}; - std::vector> lvl_stripe_sizes; + // The buffers are initialized for each reading stripe chunks. + // After decoding, such buffers need to be released. + // This can only be implemented after chunked output is ready. std::vector> lvl_stripe_data; + + std::vector> lvl_stripe_sizes; + std::vector>> null_count_prefix_sums; std::vector> lvl_data_chunks; std::vector> lvl_stream_info; diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index c1f8b7a12df..3333af1ac97 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -174,6 +174,7 @@ rmm::device_buffer decompress_stripe_data( cudf::detail::hostdevice_vector compinfo( 0, stream_info.size(), stream); + for (auto const& info : stream_info) { #ifdef PRINT_DEBUG printf("collec stream again [%d, %d, %d, %d]: dst = %lu, length = %lu\n", @@ -756,6 +757,8 @@ void reader::impl::prepare_data(uint64_t skip_rows, global_preprocess(skip_rows, num_rows_opt, stripes); + if (_file_itm_data.has_no_data()) { return; } + // TODO: fix this, should be called once while (_chunk_read_data.more_stripe_to_load()) { pass_preprocess(); @@ -771,9 +774,6 @@ void reader::impl::prepare_data(uint64_t skip_rows, auto const rows_to_read = _file_itm_data.rows_to_read; auto const& selected_stripes = _file_itm_data.selected_stripes; - // If no rows or stripes to read, return empty columns - if (rows_to_read == 0 || selected_stripes.empty()) { return; } - // Set up table for converting timestamp columns from local to UTC time auto const tz_table = [&, &selected_stripes = selected_stripes] { auto const has_timestamp_column = std::any_of( @@ -791,11 +791,13 @@ void reader::impl::prepare_data(uint64_t skip_rows, auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; auto& null_count_prefix_sums = _file_itm_data.null_count_prefix_sums; auto& lvl_chunks = _file_itm_data.lvl_data_chunks; - lvl_stripe_data.resize(_selected_columns.num_levels()); - lvl_chunks.resize(_selected_columns.num_levels()); + // TODO: move this to global step + lvl_chunks.resize(_selected_columns.num_levels()); _out_buffers.resize(_selected_columns.num_levels()); + std::size_t num_stripes = selected_stripes.size(); + // Iterates through levels of nested columns, child column will be one level down // compared to parent column. auto& col_meta = *_col_meta; @@ -830,12 +832,9 @@ void reader::impl::prepare_data(uint64_t skip_rows, } } - // Get the total number of stripes across all input files. - std::size_t total_num_stripes = selected_stripes.size(); - auto const num_columns = columns_level.size(); - auto& chunks = lvl_chunks[level]; - chunks = - cudf::detail::hostdevice_2dvector(total_num_stripes, num_columns, _stream); + auto const num_columns = columns_level.size(); + auto& chunks = lvl_chunks[level]; + chunks = cudf::detail::hostdevice_2dvector(num_stripes, num_columns, _stream); memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); const bool use_index = @@ -845,7 +844,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, // Only use if we don't have much work with complete columns & stripes // TODO: Consider nrows, gpu, and tune the threshold (rows_to_read > _metadata.get_row_index_stride() && !(_metadata.get_row_index_stride() & 7) && - _metadata.get_row_index_stride() > 0 && num_columns * total_num_stripes < 8 * 128) && + _metadata.get_row_index_stride() > 0 && num_columns * num_stripes < 8 * 128) && // Only use if first row is aligned to a stripe boundary // TODO: Fix logic to handle unaligned rows (rows_to_skip == 0); @@ -859,7 +858,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, _selected_columns.levels[level].size(), [&]() { return cudf::detail::make_zeroed_device_uvector_async( - total_num_stripes, _stream, rmm::mr::get_current_device_resource()); + num_stripes, _stream, rmm::mr::get_current_device_resource()); }); // Tracker for eventually deallocating compressed and uncompressed data @@ -868,8 +867,10 @@ void reader::impl::prepare_data(uint64_t skip_rows, std::size_t stripe_start_row = 0; std::size_t num_dict_entries = 0; std::size_t num_rowgroups = 0; - std::size_t stripe_idx = 0; - std::size_t stream_idx = 0; + + // TODO: Stripe and stream idx must be by chunk. + std::size_t stripe_idx = 0; + std::size_t stream_idx = 0; // std::vector, std::size_t>> read_tasks; for (auto const& stripe : selected_stripes) { @@ -984,7 +985,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, stream_info, chunks, row_groups, - total_num_stripes, + num_stripes, _metadata.get_row_index_stride(), level == 0, _stream); @@ -999,7 +1000,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, nullptr, chunks.base_device_ptr(), num_columns, - total_num_stripes, + num_stripes, num_rowgroups, _metadata.get_row_index_stride(), level == 0, @@ -1009,7 +1010,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, for (std::size_t i = 0; i < column_types.size(); ++i) { bool is_nullable = false; - for (std::size_t j = 0; j < total_num_stripes; ++j) { + for (std::size_t j = 0; j < num_stripes; ++j) { if (chunks[j][i].strm_len[gpu::CI_PRESENT] != 0) { is_nullable = true; break; From 294ad503f10374296fc41a242d2235a56c8aca57 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Jan 2024 09:02:00 -0800 Subject: [PATCH 055/321] Cleanup Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 4 ++++ cpp/src/io/orc/reader_impl.hpp | 4 ++++ cpp/src/io/orc/reader_impl_chunking.cu | 21 --------------------- 3 files changed, 8 insertions(+), 21 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index e5470df05a2..4ee25fdab70 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -115,6 +115,10 @@ table_with_metadata reader::impl::read_chunk_internal() return {std::make_unique(std::move(out_columns)), std::move(out_metadata)}; } +void reader::impl::decompress_and_decode() {} + +table_with_metadata reader::impl::make_output_chunk() { return table_with_metadata{}; } + // Forward to implementation reader::reader(std::vector>&& sources, orc_reader_options const& options, diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index b2e22a16b85..d7653e3e180 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -86,6 +86,10 @@ class reader::impl { void subpass_preprocess(); + void decompress_and_decode(); + + table_with_metadata make_output_chunk(); + /** * @brief Create the output table metadata from file metadata. * diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index b987cb3d8cf..75485cdd711 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -452,27 +452,6 @@ void reader::impl::pass_preprocess() dst_base + read.dst_pos, buffer->data(), read.length, cudaMemcpyDefault, _stream.value())); // _stream.synchronize(); host_read_buffers.emplace_back(std::move(buffer)); - -#if 0 - // This in theory should be faster, but in practice it's slower. Why? - read_tasks.push_back( - std::pair(std::async(std::launch::async, - [&, read = read, dst_base = dst_base] { - auto const buffer = - _metadata.per_file_metadata[read.source_idx].source->host_read( - read.offset, read.length); - CUDF_EXPECTS(buffer->size() == read.length, - "Unexpected discrepancy in bytes read."); - CUDF_CUDA_TRY(cudaMemcpyAsync(dst_base + read.dst_pos, - buffer->data(), - read.length, - cudaMemcpyDefault, - _stream.value())); - _stream.synchronize(); - return read.length; - }), - read.length)); -#endif } } From 5b00dac5c5a0f3fe8175f2d7caf8b58a2631c91e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Jan 2024 19:20:50 -0800 Subject: [PATCH 056/321] Extract common code Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 37 ++++++++++++++++---------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 75485cdd711..9407a7c3d24 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -214,6 +214,24 @@ void verify_splits(host_span splits, } #endif +std::pair get_range(std::vector const& input_chunks, + chunk const& selected_chunks) +{ + // Range indices to input_chunks + auto const chunk_begin = selected_chunks.start_idx; + auto const chunk_end = selected_chunks.start_idx + selected_chunks.count; + + // The first and last chunk, according to selected_chunk + auto const& first_chunk = input_chunks[chunk_begin]; + auto const& last_chunk = input_chunks[chunk_end - 1]; + + // The range of data covered from the first to the last chunk. + auto const begin = first_chunk.start_idx; + auto const end = last_chunk.start_idx + last_chunk.count; + + return {begin, end}; +} + } // namespace void reader::impl::global_preprocess(uint64_t skip_rows, @@ -422,15 +440,11 @@ void reader::impl::pass_preprocess() } } - auto const& stripe_stream_read_chunks = _file_itm_data.stripe_stream_read_chunks; - std::vector> host_read_buffers; std::vector, std::size_t>> read_tasks; - auto const stripe_first_chunk = stripe_stream_read_chunks[stripe_start]; - auto const stripe_last_chunk = stripe_stream_read_chunks[stripe_end - 1]; - auto const read_begin = stripe_first_chunk.start_idx; - auto const read_end = stripe_last_chunk.start_idx + stripe_last_chunk.count; + auto const& stripe_stream_read_chunks = _file_itm_data.stripe_stream_read_chunks; + auto const [read_begin, read_end] = get_range(stripe_stream_read_chunks, stripe_chunk); for (auto read_idx = read_begin; read_idx < read_end; ++read_idx) { auto const& read = read_info[read_idx]; @@ -489,14 +503,9 @@ void reader::impl::subpass_preprocess() auto& stripe_data = lvl_stripe_data[level]; if (stripe_data.empty()) { continue; } - auto const& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; - auto const stripe_start = stripe_chunk.start_idx; - auto const stripe_end = stripe_chunk.start_idx + stripe_chunk.count; - auto const stripe_first_chunk = stripe_stream_chunks[stripe_start]; - auto const stripe_last_chunk = stripe_stream_chunks[stripe_end - 1]; - auto const stream_begin = stripe_first_chunk.start_idx; - auto const stream_end = stripe_last_chunk.start_idx + stripe_last_chunk.count; - auto const num_streams = stream_end - stream_begin; + auto const& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; + auto const [stream_begin, stream_end] = get_range(stripe_stream_chunks, stripe_chunk); + auto const num_streams = stream_end - stream_begin; // Setup row group descriptors if using indexes if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { From 4e2aec15a9c8ce4d016087d41194f7ee45d769d9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Jan 2024 20:18:07 -0800 Subject: [PATCH 057/321] Split chunks by decompression size Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 80 +++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 8 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 9407a7c3d24..4efaf26e402 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -108,8 +108,8 @@ std::size_t gather_stream_info(std::size_t stripe_index, } struct cumulative_size { - int64_t count; - std::size_t size_bytes; + int64_t count{0}; + std::size_t size_bytes{0}; }; struct cumulative_size_sum { @@ -214,6 +214,13 @@ void verify_splits(host_span splits, } #endif +/** + * @brief + * + * @param input_chunks + * @param selected_chunks + * @return + */ std::pair get_range(std::vector const& input_chunks, chunk const& selected_chunks) { @@ -391,8 +398,8 @@ void reader::impl::global_preprocess(uint64_t skip_rows, _chunk_read_data.load_stripe_chunks = find_splits(total_stripe_sizes, num_stripes, _chunk_read_data.read_size_limit); -#ifdef PRINT_DEBUG - auto& splits = _file_itm_data.load_stripe_chunks; +#ifndef PRINT_DEBUG + auto& splits = _chunk_read_data.load_stripe_chunks; printf("------------\nSplits (/%d): \n", (int)num_stripes); for (size_t idx = 0; idx < splits.size(); idx++) { printf("{%ld, %ld}\n", splits[idx].start_idx, splits[idx].count); @@ -408,7 +415,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // 3. sum(sizes of stripes in a chunk) < size_limit if chunk has more than 1 stripe // 4. sum(number of stripes in all chunks) == total_num_stripes. // TODO: enable only in debug. - verify_splits(splits, total_stripe_sizes, num_stripes, _file_itm_data.read_size_limit); +// verify_splits(splits, total_stripe_sizes, num_stripes, _chunk_read_data.read_size_limit); #endif } @@ -480,7 +487,6 @@ void reader::impl::subpass_preprocess() if (_file_itm_data.has_no_data()) { return; } // auto const rows_to_read = _file_itm_data.rows_to_read; - // auto const& selected_stripes = _file_itm_data.selected_stripes; auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_chunks; @@ -494,6 +500,9 @@ void reader::impl::subpass_preprocess() auto const stripe_chunk = _chunk_read_data.load_stripe_chunks[_chunk_read_data.curr_load_stripe_chunk++]; + cudf::detail::hostdevice_vector stripe_decompression_sizes(stripe_chunk.count, + _stream); + // Parse the decompressed sizes for each stripe. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto& stream_info = _file_itm_data.lvl_stream_info[level]; @@ -553,6 +562,10 @@ void reader::impl::subpass_preprocess() compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, stream_compinfo->num_uncompressed_blocks, stream_compinfo->max_uncompressed_size}; + stripe_decompression_sizes[stream_id.stripe_idx - stripe_chunk.start_idx] = { + 1, + stripe_decompression_sizes[stream_id.stripe_idx - stripe_chunk.start_idx].size_bytes + + stream_compinfo->max_uncompressed_size}; #ifdef PRINT_DEBUG printf("cache info [%d, %d, %d, %d]: %lu | %lu | %lu\n", (int)stream_id.stripe_idx, @@ -573,14 +586,65 @@ void reader::impl::subpass_preprocess() // printf("no compression \n"); // fflush(stdout); - // Set decompressed data size equal to the input size. - // TODO + // Set decompression size equal to the input size. + for (auto stream_idx = stream_begin; stream_idx < stream_end; ++stream_idx) { + auto const& info = stream_info[stream_idx]; + stripe_decompression_sizes[info.stripe_idx - stripe_chunk.start_idx] = { + 1, + stripe_decompression_sizes[info.stripe_idx - stripe_chunk.start_idx].size_bytes + + info.length}; + } } // printf(" end level %d\n\n", (int)level); } // end loop level + // Compute the prefix sum of stripe data sizes. + stripe_decompression_sizes.host_to_device_async(_stream); + thrust::inclusive_scan(rmm::exec_policy(_stream), + stripe_decompression_sizes.d_begin(), + stripe_decompression_sizes.d_end(), + stripe_decompression_sizes.d_begin(), + cumulative_size_sum{}); + + stripe_decompression_sizes.device_to_host_sync(_stream); + + // DEBUG only + _chunk_read_data.read_size_limit = + stripe_decompression_sizes[stripe_decompression_sizes.size() - 1].size_bytes / 3; + + _chunk_read_data.decode_stripe_chunks = + find_splits(stripe_decompression_sizes, stripe_chunk.count, _chunk_read_data.read_size_limit); + for (auto& chunk : _chunk_read_data.decode_stripe_chunks) { + chunk.start_idx += stripe_chunk.start_idx; + } + + for (auto& size : stripe_decompression_sizes) { + printf("size: %ld, %zu\n", size.count, size.size_bytes); + } + +#ifndef PRINT_DEBUG + auto& splits = _chunk_read_data.decode_stripe_chunks; + printf("------------\nSplits second level (/%d): \n", (int)stripe_chunk.count); + for (size_t idx = 0; idx < splits.size(); idx++) { + printf("{%ld, %ld}\n", splits[idx].start_idx, splits[idx].count); + } + fflush(stdout); + + // std::cout << " total rows: " << _file_itm_data.rows_to_read << std::endl; + // print_cumulative_row_info(stripe_size_bytes, " ", _chunk_read_info.chunks); + + // We need to verify that: + // 1. All chunk must have count > 0 + // 2. Chunks are continuous. + // 3. sum(sizes of stripes in a chunk) < size_limit if chunk has more than 1 stripe + // 4. sum(number of stripes in all chunks) == total_num_stripes. + // TODO: enable only in debug. +// verify_splits(splits, stripe_decompression_sizes, stripe_chunk.count, +// _file_itm_data.read_size_limit); +#endif + // lvl_stripe_data.clear(); // _file_itm_data.compinfo_ready = true; } From 33d304a39501cfee6edf9f6900903b1e39f3c071 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Jan 2024 20:27:50 -0800 Subject: [PATCH 058/321] Fix bug Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 4efaf26e402..d01039b7a62 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -371,14 +371,11 @@ void reader::impl::global_preprocess(uint64_t skip_rows, chunk{last_read_size, static_cast(read_info.size() - last_read_size)}; } - // DEBUG only - _chunk_read_data.read_size_limit = - total_stripe_sizes[total_stripe_sizes.size() - 1].size_bytes / 3; - // Load all chunks if there is no read limit. if (_chunk_read_data.read_size_limit == 0) { _chunk_read_data.load_stripe_chunks = {chunk{0, static_cast(num_stripes)}}; - return; + // TODO: DEBUG only + // return; } // Compute the prefix sum of stripe data sizes. @@ -391,9 +388,13 @@ void reader::impl::global_preprocess(uint64_t skip_rows, total_stripe_sizes.device_to_host_sync(_stream); - // for (auto& size : total_stripe_sizes) { - // printf("size: %ld, %zu\n", size.count, size.size_bytes); - // } + for (auto& size : total_stripe_sizes) { + printf("size: %ld, %zu\n", size.count, size.size_bytes); + } + + // DEBUG only + _chunk_read_data.read_size_limit = + total_stripe_sizes[total_stripe_sizes.size() - 1].size_bytes / 3; _chunk_read_data.load_stripe_chunks = find_splits(total_stripe_sizes, num_stripes, _chunk_read_data.read_size_limit); From b4b286a87a01fe41cae5294af49b239d09d17388 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Jan 2024 20:40:17 -0800 Subject: [PATCH 059/321] Implement chunk merging Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index d01039b7a62..ad5bfdd398b 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -164,6 +164,17 @@ std::vector find_splits(host_span sizes, cur_cumulative_size = sizes[split_pos].size_bytes; } + // If the last chunk has size smaller than `merge_threshold` percent of the second last one, + // merge it with the second last one. + if (splits.size() > 1) { + auto constexpr merge_threshold = 0.15; + if (auto const last = splits.back(), second_last = splits[splits.size() - 2]; + last.count <= static_cast(merge_threshold * second_last.count)) { + splits.pop_back(); + splits.back().count += last.count; + } + } + return splits; } #endif @@ -601,6 +612,13 @@ void reader::impl::subpass_preprocess() } // end loop level + // Decode all chunks if there is no read limit. + if (_chunk_read_data.read_size_limit == 0) { + _chunk_read_data.decode_stripe_chunks = {stripe_chunk}; + // TODO: DEBUG only + // return; + } + // Compute the prefix sum of stripe data sizes. stripe_decompression_sizes.host_to_device_async(_stream); thrust::inclusive_scan(rmm::exec_policy(_stream), @@ -612,8 +630,8 @@ void reader::impl::subpass_preprocess() stripe_decompression_sizes.device_to_host_sync(_stream); // DEBUG only - _chunk_read_data.read_size_limit = - stripe_decompression_sizes[stripe_decompression_sizes.size() - 1].size_bytes / 3; + // _chunk_read_data.read_size_limit = + // stripe_decompression_sizes[stripe_decompression_sizes.size() - 1].size_bytes / 3; _chunk_read_data.decode_stripe_chunks = find_splits(stripe_decompression_sizes, stripe_chunk.count, _chunk_read_data.read_size_limit); @@ -622,7 +640,7 @@ void reader::impl::subpass_preprocess() } for (auto& size : stripe_decompression_sizes) { - printf("size: %ld, %zu\n", size.count, size.size_bytes); + printf("decomp size: %ld, %zu\n", size.count, size.size_bytes); } #ifndef PRINT_DEBUG From 2e81db13cd6aa05412209642429e1dcf5c9d0b41 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Jan 2024 20:57:49 -0800 Subject: [PATCH 060/321] Rename variable Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 29 +++++++++++--------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index ad5bfdd398b..ff9dc6cd3ef 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -512,8 +512,8 @@ void reader::impl::subpass_preprocess() auto const stripe_chunk = _chunk_read_data.load_stripe_chunks[_chunk_read_data.curr_load_stripe_chunk++]; - cudf::detail::hostdevice_vector stripe_decompression_sizes(stripe_chunk.count, - _stream); + cudf::detail::hostdevice_vector stripe_decomp_sizes(stripe_chunk.count, _stream); + std::fill(stripe_decomp_sizes.begin(), stripe_decomp_sizes.end(), cumulative_size{1, 0}); // Parse the decompressed sizes for each stripe. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { @@ -574,10 +574,8 @@ void reader::impl::subpass_preprocess() compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, stream_compinfo->num_uncompressed_blocks, stream_compinfo->max_uncompressed_size}; - stripe_decompression_sizes[stream_id.stripe_idx - stripe_chunk.start_idx] = { - 1, - stripe_decompression_sizes[stream_id.stripe_idx - stripe_chunk.start_idx].size_bytes + - stream_compinfo->max_uncompressed_size}; + stripe_decomp_sizes[stream_id.stripe_idx - stripe_chunk.start_idx].size_bytes += + stream_compinfo->max_uncompressed_size; #ifdef PRINT_DEBUG printf("cache info [%d, %d, %d, %d]: %lu | %lu | %lu\n", (int)stream_id.stripe_idx, @@ -601,10 +599,7 @@ void reader::impl::subpass_preprocess() // Set decompression size equal to the input size. for (auto stream_idx = stream_begin; stream_idx < stream_end; ++stream_idx) { auto const& info = stream_info[stream_idx]; - stripe_decompression_sizes[info.stripe_idx - stripe_chunk.start_idx] = { - 1, - stripe_decompression_sizes[info.stripe_idx - stripe_chunk.start_idx].size_bytes + - info.length}; + stripe_decomp_sizes[info.stripe_idx - stripe_chunk.start_idx].size_bytes += info.length; } } @@ -620,26 +615,26 @@ void reader::impl::subpass_preprocess() } // Compute the prefix sum of stripe data sizes. - stripe_decompression_sizes.host_to_device_async(_stream); + stripe_decomp_sizes.host_to_device_async(_stream); thrust::inclusive_scan(rmm::exec_policy(_stream), - stripe_decompression_sizes.d_begin(), - stripe_decompression_sizes.d_end(), - stripe_decompression_sizes.d_begin(), + stripe_decomp_sizes.d_begin(), + stripe_decomp_sizes.d_end(), + stripe_decomp_sizes.d_begin(), cumulative_size_sum{}); - stripe_decompression_sizes.device_to_host_sync(_stream); + stripe_decomp_sizes.device_to_host_sync(_stream); // DEBUG only // _chunk_read_data.read_size_limit = // stripe_decompression_sizes[stripe_decompression_sizes.size() - 1].size_bytes / 3; _chunk_read_data.decode_stripe_chunks = - find_splits(stripe_decompression_sizes, stripe_chunk.count, _chunk_read_data.read_size_limit); + find_splits(stripe_decomp_sizes, stripe_chunk.count, _chunk_read_data.read_size_limit); for (auto& chunk : _chunk_read_data.decode_stripe_chunks) { chunk.start_idx += stripe_chunk.start_idx; } - for (auto& size : stripe_decompression_sizes) { + for (auto& size : stripe_decomp_sizes) { printf("decomp size: %ld, %zu\n", size.count, size.size_bytes); } From 3260ecc5a1d4e27ab5ff0d0990000896ec6a3d38 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Jan 2024 21:35:41 -0800 Subject: [PATCH 061/321] Find a test that can benefit from subpass Signed-off-by: Nghia Truong --- cpp/tests/io/orc_test.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index 5ff27bd9e10..a0a0cecb038 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -225,16 +225,17 @@ struct SkipRowTest { TYPED_TEST(OrcWriterNumericTypeTest, SingleColumn) { - auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; }); + auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 100; }); - constexpr auto num_rows = 100; + constexpr auto num_rows = 10000000; column_wrapper col(sequence, sequence + num_rows); table_view expected({col}); auto filepath = temp_env->get_temp_filepath("OrcSingleColumn.orc"); cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .compression(cudf::io::compression_type::SNAPPY); cudf::io::write_orc(out_opts); cudf::io::orc_reader_options in_opts = From 2dca1eb9d2d7049a04f1b5549594e0c482ce3efa Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 2 Feb 2024 13:28:05 +0700 Subject: [PATCH 062/321] Reorganize, rename, and add docs for variables --- cpp/src/io/orc/reader_impl_chunking.cu | 14 +- cpp/src/io/orc/reader_impl_chunking.hpp | 174 ++++++++++++++--------- cpp/src/io/orc/reader_impl_preprocess.cu | 4 +- 3 files changed, 119 insertions(+), 73 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index ff9dc6cd3ef..2463196dc31 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -276,8 +276,8 @@ void reader::impl::global_preprocess(uint64_t skip_rows, lvl_stripe_data.resize(_selected_columns.num_levels()); lvl_stripe_sizes.resize(_selected_columns.num_levels()); - auto& read_info = _file_itm_data.stream_read_info; - auto& stripe_stream_read_chunks = _file_itm_data.stripe_stream_read_chunks; + auto& read_info = _file_itm_data.data_read_info; + auto& stripe_data_read_chunks = _file_itm_data.stripe_data_read_chunks; auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_chunks; // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. @@ -290,7 +290,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // Get the total number of stripes across all input files. std::size_t num_stripes = selected_stripes.size(); - stripe_stream_read_chunks.resize(num_stripes); + stripe_data_read_chunks.resize(num_stripes); lvl_stripe_stream_chunks.resize(_selected_columns.num_levels()); // TODO: Check if these data depends on pass and subpass, instead of global pass. @@ -378,7 +378,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, } } total_stripe_sizes[stripe_idx] = {1, total_stripe_size}; - stripe_stream_read_chunks[stripe_idx] = + stripe_data_read_chunks[stripe_idx] = chunk{last_read_size, static_cast(read_info.size() - last_read_size)}; } @@ -441,7 +441,7 @@ void reader::impl::pass_preprocess() auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; auto& lvl_stripe_sizes = _file_itm_data.lvl_stripe_sizes; - auto& read_info = _file_itm_data.stream_read_info; + auto& read_info = _file_itm_data.data_read_info; // std::size_t num_stripes = selected_stripes.size(); auto const stripe_chunk = @@ -462,8 +462,8 @@ void reader::impl::pass_preprocess() std::vector> host_read_buffers; std::vector, std::size_t>> read_tasks; - auto const& stripe_stream_read_chunks = _file_itm_data.stripe_stream_read_chunks; - auto const [read_begin, read_end] = get_range(stripe_stream_read_chunks, stripe_chunk); + auto const& stripe_data_read_chunks = _file_itm_data.stripe_data_read_chunks; + auto const [read_begin, read_end] = get_range(stripe_data_read_chunks, stripe_chunk); for (auto read_idx = read_begin; read_idx < read_end; ++read_idx) { auto const& read = read_info[read_idx]; diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index ea2c1d2cd81..1e5d0a3d988 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -31,9 +31,45 @@ namespace cudf::io::orc::detail { /** - * @brief Struct that maps ORC streams to columns + * @brief Struct that store identification of an ORC streams + */ +struct stream_id_info { + uint32_t stripe_idx; // TODO: check if this is correct stripe processing index, not stripe index in source + // TODO: change type below + std::size_t level; // level of the nested column + uint32_t orc_col_idx; // orc column id + StreamKind kind; // stream kind + + struct hash { + std::size_t operator()(stream_id_info const& index) const + { + auto const hasher = std::hash{}; + return hasher(index.stripe_idx) ^ hasher(index.level) ^ + hasher(static_cast(index.orc_col_idx)) ^ + hasher(static_cast(index.kind)); + } + }; + struct equal_to { + bool operator()(stream_id_info const& lhs, stream_id_info const& rhs) const + { + return lhs.stripe_idx == rhs.stripe_idx && lhs.level == rhs.level && + lhs.orc_col_idx == rhs.orc_col_idx && lhs.kind == rhs.kind; + } + }; +}; + +/** + * @brief Map to lookup a value from stream id. +*/ +template +using stream_id_map = + std::unordered_map; + +/** + * @brief Struct that store identification of an ORC streams. */ struct orc_stream_info { + // TODO: remove constructor explicit orc_stream_info(uint64_t offset_, std::size_t dst_pos_, uint32_t length_, @@ -57,79 +93,72 @@ struct orc_stream_info { (int)kind); #endif } - uint64_t offset; // offset in file - std::size_t dst_pos; // offset in memory relative to start of compressed stripe data - std::size_t length; // length in file - uint32_t stripe_idx; // stripe processing index, not stripe index in source - std::size_t level; // TODO - uint32_t orc_col_idx; - StreamKind kind; -}; + // Data info: + uint64_t offset; // offset in data source + std::size_t dst_pos; // offset to store data in memory relative to start of raw stripe data + std::size_t length; // stream length to read -// unify this with orc_stream_info -struct stream_id_info { - std::size_t stripe_idx; - std::size_t level; - uint32_t orc_col_idx; - StreamKind kind; + // Store location of the stream in the stripe, so we can look up where this stream comes from. + stream_id_info id; }; + +/** + * @brief Struct that store compression information for a stripe at a specific nested level. + */ struct stripe_level_comp_info { std::size_t num_compressed_blocks{0}; std::size_t num_uncompressed_blocks{0}; std::size_t total_decomp_size{0}; }; -struct stream_id_equal { - bool operator()(stream_id_info const& lhs, stream_id_info const& rhs) const - { - return lhs.stripe_idx == rhs.stripe_idx && lhs.level == rhs.level && - lhs.orc_col_idx == rhs.orc_col_idx && lhs.kind == rhs.kind; - } -}; -struct stream_id_hash { - std::size_t operator()(stream_id_info const& index) const - { - auto const hasher = std::hash{}; - return hasher(index.stripe_idx) ^ hasher(index.level) ^ - hasher(static_cast(index.orc_col_idx)) ^ - hasher(static_cast(index.kind)); - } -}; +// TODO: remove this and use range instead +/** + * @brief Struct that store information about a chunk of data. + */ struct chunk { int64_t start_idx; int64_t count; }; +/** + * @brief Struct that store information about a range of data. + */ struct range { int64_t begin; int64_t end; }; /** - * @brief Struct to store file-level data that remains constant for all chunks being read. + * @brief Struct to store file-level data that remains constant for all chunks being output. */ struct file_intermediate_data { - // If no rows or stripes to read, return empty columns + int64_t rows_to_skip; + size_type rows_to_read; + std::vector selected_stripes; + + + // Return true if no rows or stripes to read. bool has_no_data() const { return rows_to_read == 0 || selected_stripes.empty(); } - std::unordered_map - compinfo_map; - // bool compinfo_ready{false}; + // Store the compression information for each data stream. + stream_id_map compinfo_map; - // The buffers are initialized for each reading stripe chunks. - // After decoding, such buffers need to be released. + // The buffers to store raw data read from disk, initialized for each reading stripe chunks. + // After decoding, such buffers can be released. // This can only be implemented after chunked output is ready. std::vector> lvl_stripe_data; + // Store the size of each stripe at each nested level. + // This is used to initialize the stripe_data buffers. std::vector> lvl_stripe_sizes; - std::vector>> null_count_prefix_sums; - std::vector> lvl_data_chunks; - std::vector> lvl_stream_info; - // Each read correspond to one or more consecutive stream combined. - struct stream_read_info { - stream_read_info(uint64_t offset_, + + // Store information to identify where to read a chunk of data from source. + // Each read corresponds to one or more consecutive streams combined. + struct data_read_info { + // TODO: remove constructor + data_read_info(uint64_t offset_, std::size_t length_, std::size_t dst_pos_, std::size_t source_idx_, @@ -143,23 +172,39 @@ struct file_intermediate_data { level(level_) { } - uint64_t offset; - std::size_t length; - std::size_t dst_pos; - std::size_t source_idx; - std::size_t stripe_idx; - std::size_t level; + uint64_t offset; // offset in data source + std::size_t dst_pos; // offset to store data in memory relative to start of raw stripe data + std::size_t length; // data length to read + std::size_t source_idx; // the data source id + std::size_t stripe_idx; // stream id TODO: processing or source stripe id? + std::size_t level; // nested level }; - std::vector stream_read_info; - std::vector stripe_stream_read_chunks; // chunk identify the reading streams (multiple - // streams can be read once) for each stripe - std::vector> - lvl_stripe_stream_chunks; // chunk identify all processing streams for each stripe, need to be - // level-based - - int64_t rows_to_skip; - size_type rows_to_read; - std::vector selected_stripes; + + // Identify what data to read from source. + std::vector data_read_info; + + // For each stripe, we perform a number of read for its streams. + // Those reads are identified by a chunk of consecutive read info, stored in data_read_info. + std::vector stripe_data_read_chunks; + + + // Store info for each ORC stream at each nested level. + std::vector> lvl_stream_info; + + +// At each nested level, the streams for each stripe are stored consecutively in lvl_stream_info. +// This is used to identify the range of streams for each stripe from that vector. + std::vector> lvl_stripe_stream_chunks; + + +// TODO + std::vector>> null_count_prefix_sums; + + // For data processing, decompression, and decoding. + // Each 'chunk' of data here corresponds to an orc column, in a stripe, at a nested level. + std::vector> lvl_data_chunks; + + bool global_preprocessed{false}; }; @@ -173,15 +218,16 @@ struct chunk_read_data { { } - std::size_t output_size_limit; // Maximum size (in bytes) of an output chunk, or 0 for no limit - std::size_t read_size_limit; // Maximum size (in bytes) of an output chunk, or 0 for no limit + std::size_t output_size_limit; // maximum size (in bytes) of an output chunk, or 0 for no limit + std::size_t read_size_limit; // approximate maximum size (in bytes) used for store + // intermediate data, or 0 for no limit - // Chunks of stripes that can be load such that total of their data size is within a limit. + // Chunks of stripes that can be load into memory such that their data size is within a size limit. std::vector load_stripe_chunks; std::size_t curr_load_stripe_chunk{0}; bool more_stripe_to_load() const { return curr_load_stripe_chunk < load_stripe_chunks.size(); } - // Chunks of stripes such that total of their decompression size is within a limit. + // Chunks of stripes such that their decompression size is within a size limit. std::vector decode_stripe_chunks; std::size_t curr_decode_stripe_chunk{0}; bool more_stripe_to_decode() const diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 3333af1ac97..a64c22fa332 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -140,6 +140,7 @@ std::size_t gather_stream_info_and_update_chunks( return dst_offset; } +// TODO: update /** * @brief Decompresses the stripe data, at stream granularity. * @@ -155,8 +156,7 @@ std::size_t gather_stream_info_and_update_chunks( * @return Device buffer to decompressed page data */ rmm::device_buffer decompress_stripe_data( - std::unordered_map const& - compinfo_map, + stream_id_map const& compinfo_map, OrcDecompressor const& decompressor, host_span stripe_data, host_span stream_info, From 48c96a28bed15fc1d0e1336580800069037551ef Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 2 Feb 2024 13:49:28 +0700 Subject: [PATCH 063/321] Fix bugs --- cpp/src/io/orc/reader_impl_chunking.cu | 22 +++++++++++----------- cpp/src/io/orc/reader_impl_chunking.hpp | 20 +++++++------------- cpp/src/io/orc/reader_impl_preprocess.cu | 22 +++++++++++----------- 3 files changed, 29 insertions(+), 35 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 2463196dc31..350be1ac712 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -95,10 +95,10 @@ std::size_t gather_stream_info(std::size_t stripe_index, stream_info.emplace_back(stripeinfo->offset + src_offset, dst_offset, stream.length, - stripe_index, + stream_id_info{stripe_index, level, column_id, - stream.kind); + stream.kind}); dst_offset += stream.length; } src_offset += stream.length; @@ -542,17 +542,17 @@ void reader::impl::subpass_preprocess() for (auto stream_idx = stream_begin; stream_idx < stream_end; ++stream_idx) { auto const& info = stream_info[stream_idx]; compinfo.push_back(gpu::CompressedStreamInfo( - static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, + static_cast(stripe_data[info.id.stripe_idx].data()) + info.dst_pos, info.length)); stream_compinfo_map[stream_id_info{ - info.stripe_idx, info.level, info.orc_col_idx, info.kind}] = + info.id.stripe_idx, info.id.level, info.id.orc_cold_idx, info.id.kind}] = &compinfo[compinfo.size() - 1]; #ifdef PRINT_DEBUG printf("collec stream [%d, %d, %d, %d]: dst = %lu, length = %lu\n", - (int)info.stripe_idx, - (int)info.level, - (int)info.orc_col_idx, - (int)info.kind, + (int)info.id.stripe_idx, + (int)info.id.level, + (int)info.id.orc_cold_idx, + (int)info.id.kind, info.dst_pos, info.length); fflush(stdout); @@ -574,11 +574,11 @@ void reader::impl::subpass_preprocess() compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, stream_compinfo->num_uncompressed_blocks, stream_compinfo->max_uncompressed_size}; - stripe_decomp_sizes[stream_id.stripe_idx - stripe_chunk.start_idx].size_bytes += + stripe_decomp_sizes[stream_id.id.stripe_idx - stripe_chunk.start_idx].size_bytes += stream_compinfo->max_uncompressed_size; #ifdef PRINT_DEBUG printf("cache info [%d, %d, %d, %d]: %lu | %lu | %lu\n", - (int)stream_id.stripe_idx, + (int)stream_id.id.stripe_idx, (int)stream_id.level, (int)stream_id.orc_col_idx, (int)stream_id.kind, @@ -599,7 +599,7 @@ void reader::impl::subpass_preprocess() // Set decompression size equal to the input size. for (auto stream_idx = stream_begin; stream_idx < stream_end; ++stream_idx) { auto const& info = stream_info[stream_idx]; - stripe_decomp_sizes[info.stripe_idx - stripe_chunk.start_idx].size_bytes += info.length; + stripe_decomp_sizes[info.id.stripe_idx - stripe_chunk.start_idx].size_bytes += info.length; } } diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 1e5d0a3d988..d3d6c04d1fb 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -34,19 +34,19 @@ namespace cudf::io::orc::detail { * @brief Struct that store identification of an ORC streams */ struct stream_id_info { - uint32_t stripe_idx; // TODO: check if this is correct stripe processing index, not stripe index in source + uint32_t stripe_idx; // global stripe id throughout the data source // TODO: change type below std::size_t level; // level of the nested column uint32_t orc_col_idx; // orc column id StreamKind kind; // stream kind struct hash { - std::size_t operator()(stream_id_info const& index) const + std::size_t operator()(stream_id_info const& id) const { auto const hasher = std::hash{}; - return hasher(index.stripe_idx) ^ hasher(index.level) ^ - hasher(static_cast(index.orc_col_idx)) ^ - hasher(static_cast(index.kind)); + return hasher(id.stripe_idx) ^ hasher(id.level) ^ + hasher(static_cast(id.orc_col_idx)) ^ + hasher(static_cast(id.kind)); } }; struct equal_to { @@ -73,17 +73,11 @@ struct orc_stream_info { explicit orc_stream_info(uint64_t offset_, std::size_t dst_pos_, uint32_t length_, - uint32_t stripe_idx_, - std::size_t level_, - uint32_t orc_col_idx_, - StreamKind kind_) + stream_id_info const& id_) : offset(offset_), dst_pos(dst_pos_), length(length_), - stripe_idx(stripe_idx_), - level(level_), - orc_col_idx(orc_col_idx_), - kind(kind_) + id(id_) { #ifdef PRINT_DEBUG printf(" construct stripe id [%d, %d, %d, %d]\n", diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index a64c22fa332..ac6f4d4336b 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -178,27 +178,27 @@ rmm::device_buffer decompress_stripe_data( for (auto const& info : stream_info) { #ifdef PRINT_DEBUG printf("collec stream again [%d, %d, %d, %d]: dst = %lu, length = %lu\n", - (int)info.stripe_idx, - (int)info.level, - (int)info.orc_col_idx, - (int)info.kind, + (int)info.id.stripe_idx, + (int)info.id.level, + (int)info.id.orc_cold_idx, + (int)info.id.kind, info.dst_pos, info.length); fflush(stdout); #endif compinfo.push_back(gpu::CompressedStreamInfo( - static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, + static_cast(stripe_data[info.id.stripe_idx].data()) + info.dst_pos, info.length)); // printf("line %d\n", __LINE__); // fflush(stdout); auto const& cached_comp_info = - compinfo_map.at(stream_id_info{info.stripe_idx, info.level, info.orc_col_idx, info.kind}); + compinfo_map.at(stream_id_info{info.id.stripe_idx, info.id.level, info.id.orc_cold_idx, info.id.kind}); // printf("line %d\n", __LINE__); // fflush(stdout); // auto const& cached_comp_info = - // compinfo_map[stream_id_info{info.stripe_idx, info.level, info.orc_col_idx, info.kind}]; + // compinfo_map[stream_id_info{info.id.stripe_idx, info.id.level, info.id.orc_cold_idx, info.id.kind}]; auto& stream_comp_info = compinfo[compinfo.size() - 1]; stream_comp_info.num_compressed_blocks = cached_comp_info.num_compressed_blocks; stream_comp_info.num_uncompressed_blocks = cached_comp_info.num_uncompressed_blocks; @@ -228,10 +228,10 @@ rmm::device_buffer decompress_stripe_data( auto const& info = stream_info[i]; printf("compute info [%d, %d, %d, %d]: %lu | %lu | %lu\n", - (int)info.stripe_idx, - (int)info.level, - (int)info.orc_col_idx, - (int)info.kind, + (int)info.id.stripe_idx, + (int)info.id.level, + (int)info.id.orc_cold_idx, + (int)info.id.kind, (size_t)compinfo[i].num_compressed_blocks, (size_t)compinfo[i].num_uncompressed_blocks, compinfo[i].max_uncompressed_size); From b64c7f2d5c4cd1ac4808d59df50ad485afaa21d4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 2 Feb 2024 14:18:40 +0700 Subject: [PATCH 064/321] Rename variables --- cpp/src/io/orc/reader_impl.cu | 20 ++++++++++++++++++++ cpp/src/io/orc/reader_impl.hpp | 21 +++++++++++++++++++++ cpp/src/io/orc/reader_impl_chunking.cu | 16 ++++++++-------- cpp/src/io/orc/reader_impl_chunking.hpp | 6 +++--- 4 files changed, 52 insertions(+), 11 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 4ee25fdab70..8e9e4365a1e 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -37,6 +37,26 @@ reader::impl::impl(std::vector>&& sources, { } +reader::impl::impl(std::size_t output_size_limit, + std::size_t data_read_limit, + std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + : _stream(stream), + _mr(mr), + _timestamp_type{options.get_timestamp_type()}, + _use_index{options.is_enabled_use_index()}, + _use_np_dtypes{options.is_enabled_use_np_dtypes()}, + _decimal128_columns{options.get_decimal128_columns()}, + _col_meta{std::make_unique()}, + _sources(std::move(sources)), + _metadata{_sources, stream}, + _selected_columns{_metadata.select_columns(options.get_columns())}, + _chunk_read_data{output_size_limit, data_read_limit} +{ +} + table_with_metadata reader::impl::read(uint64_t skip_rows, std::optional const& num_rows_opt, std::vector> const& stripes) diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index d7653e3e180..5324f8dbc8a 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -53,6 +53,25 @@ class reader::impl { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); + /** + * @brief Constructor from a dataset source with reader options. + * + * @param output_size_limit Limit on total number of bytes to be returned per read, + * or `0` if there is no limit + * @param data_read_limit Limit on memory usage for the purposes of decompression and processing + * of input, or `0` if there is no limit + * @param sources Dataset sources + * @param options Settings for controlling reading behavior + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource to use for device memory allocation + */ + explicit impl(std::size_t output_size_limit, + std::size_t data_read_limit, + std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + /** * @brief Read an entire set or a subset of data and returns a set of columns * @@ -65,6 +84,8 @@ class reader::impl { std::optional const& num_rows_opt, std::vector> const& stripes); + + private: /** * @brief Perform all the necessary data preprocessing before creating an output table. diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 350be1ac712..e0f2db311d6 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -383,7 +383,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, } // Load all chunks if there is no read limit. - if (_chunk_read_data.read_size_limit == 0) { + if (_chunk_read_data.data_read_limit == 0) { _chunk_read_data.load_stripe_chunks = {chunk{0, static_cast(num_stripes)}}; // TODO: DEBUG only // return; @@ -404,11 +404,11 @@ void reader::impl::global_preprocess(uint64_t skip_rows, } // DEBUG only - _chunk_read_data.read_size_limit = + _chunk_read_data.data_read_limit = total_stripe_sizes[total_stripe_sizes.size() - 1].size_bytes / 3; _chunk_read_data.load_stripe_chunks = - find_splits(total_stripe_sizes, num_stripes, _chunk_read_data.read_size_limit); + find_splits(total_stripe_sizes, num_stripes, _chunk_read_data.data_read_limit); #ifndef PRINT_DEBUG auto& splits = _chunk_read_data.load_stripe_chunks; @@ -427,7 +427,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // 3. sum(sizes of stripes in a chunk) < size_limit if chunk has more than 1 stripe // 4. sum(number of stripes in all chunks) == total_num_stripes. // TODO: enable only in debug. -// verify_splits(splits, total_stripe_sizes, num_stripes, _chunk_read_data.read_size_limit); +// verify_splits(splits, total_stripe_sizes, num_stripes, _chunk_read_data.data_read_limit); #endif } @@ -608,7 +608,7 @@ void reader::impl::subpass_preprocess() } // end loop level // Decode all chunks if there is no read limit. - if (_chunk_read_data.read_size_limit == 0) { + if (_chunk_read_data.data_read_limit == 0) { _chunk_read_data.decode_stripe_chunks = {stripe_chunk}; // TODO: DEBUG only // return; @@ -625,11 +625,11 @@ void reader::impl::subpass_preprocess() stripe_decomp_sizes.device_to_host_sync(_stream); // DEBUG only - // _chunk_read_data.read_size_limit = + // _chunk_read_data.data_read_limit = // stripe_decompression_sizes[stripe_decompression_sizes.size() - 1].size_bytes / 3; _chunk_read_data.decode_stripe_chunks = - find_splits(stripe_decomp_sizes, stripe_chunk.count, _chunk_read_data.read_size_limit); + find_splits(stripe_decomp_sizes, stripe_chunk.count, _chunk_read_data.data_read_limit); for (auto& chunk : _chunk_read_data.decode_stripe_chunks) { chunk.start_idx += stripe_chunk.start_idx; } @@ -656,7 +656,7 @@ void reader::impl::subpass_preprocess() // 4. sum(number of stripes in all chunks) == total_num_stripes. // TODO: enable only in debug. // verify_splits(splits, stripe_decompression_sizes, stripe_chunk.count, -// _file_itm_data.read_size_limit); +// _file_itm_data.data_read_limit); #endif // lvl_stripe_data.clear(); diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index d3d6c04d1fb..2d07cae3214 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -207,13 +207,13 @@ struct file_intermediate_data { * @brief Struct to store all data necessary for chunked reading. */ struct chunk_read_data { - explicit chunk_read_data(std::size_t output_size_limit_ = 0, std::size_t read_size_limit_ = 0) - : output_size_limit{output_size_limit_}, read_size_limit(read_size_limit_) + explicit chunk_read_data(std::size_t output_size_limit_ = 0, std::size_t data_read_limit_ = 0) + : output_size_limit{output_size_limit_}, data_read_limit(data_read_limit_) { } std::size_t output_size_limit; // maximum size (in bytes) of an output chunk, or 0 for no limit - std::size_t read_size_limit; // approximate maximum size (in bytes) used for store + std::size_t data_read_limit; // approximate maximum size (in bytes) used for store // intermediate data, or 0 for no limit // Chunks of stripes that can be load into memory such that their data size is within a size limit. From 6ede82e53864913fa4a6f9626d79a7e01e682a88 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 2 Feb 2024 21:47:00 +0700 Subject: [PATCH 065/321] Add more docs --- cpp/src/io/orc/reader_impl.cu | 1 + cpp/src/io/orc/reader_impl.hpp | 51 ++++++++++++++++++++---- cpp/src/io/orc/reader_impl_chunking.cu | 3 +- cpp/src/io/orc/reader_impl_preprocess.cu | 2 +- 4 files changed, 48 insertions(+), 9 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 8e9e4365a1e..71996f026f9 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -135,6 +135,7 @@ table_with_metadata reader::impl::read_chunk_internal() return {std::make_unique
(std::move(out_columns)), std::move(out_metadata)}; } +// TODO: move code here void reader::impl::decompress_and_decode() {} table_with_metadata reader::impl::make_output_chunk() { return table_with_metadata{}; } diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 5324f8dbc8a..0b7c00d621d 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -89,6 +89,9 @@ class reader::impl { private: /** * @brief Perform all the necessary data preprocessing before creating an output table. + * + * This is the proxy to call all other data preprocessing functions, which are prerequisite + * for generating an output table. * * @param skip_rows Number of rows to skip from the start * @param num_rows_opt Optional number of rows to read, or `std::nullopt` to read all rows @@ -98,17 +101,55 @@ class reader::impl { std::optional const& num_rows_opt, std::vector> const& stripes); - // Do once for the entire file. + /** + * @brief Perform a global preprocessing step that executes exactly once for the entire duration + * of the reader. + * + * In this step, the metadata of all stripes in the data source is parsed, and information about + * data streams for all selected columns in alls tripes are generated. If the reader has a data + * read limit, data size of all stripes are used to determine the chunks of consecutive + * stripes for reading each time using the `read_data()` step. This is to ensure that loading + * these stripes will not exceed a fixed portion the data read limit. + */ void global_preprocess(uint64_t skip_rows, std::optional const& num_rows_opt, std::vector> const& stripes); - void pass_preprocess(); + /** + * @brief Read stripes from the input source and store the data in the internal buffers. + * + * If there is a data read limit, only a chunk of stripes are read at a time such that + * their total data size does not exceed a fixed portion of the limit. Then, the data is + * probed to determine the uncompressed sizes for these loaded stripes, which are in turn + * used to determine a subset of stripes to decompress and decode in the next step + * `decompress_and_decode()`. + * This is to ensure that loading data together with decompression and decoding will not exceed + * the data read limit. + */ + void read_data(); + /** + * TODO: merge with read data. + */ void subpass_preprocess(); + /** + * @brief Decompress and decode the data in the internal buffers, and store the result into + * an internal table. + * + * If there is a data read limit, only a chunk of stripes are decompressed and decoded at a time. + * Then, the result is stored in an internal table, and sizes of its rows are computed + * to determine slices of rows to return as the output table in the final step `make_output_chunk`. + */ void decompress_and_decode(); + /** + * @brief Create the output table from the internal buffers and return it along with metadata. + * + * This function is called internally and expects all preprocessing steps have already been done. + * + * @return The output table along with columns' metadata + */ table_with_metadata make_output_chunk(); /** @@ -119,11 +160,7 @@ class reader::impl { table_metadata make_output_metadata(); /** - * @brief Read a chunk of data from the input source and return an output table with metadata. - * - * This function is called internally and expects all preprocessing steps have already been done. - * - * @return The output table along with columns' metadata + * TODO: move code to make_output_chunk */ table_with_metadata read_chunk_internal(); diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index e0f2db311d6..ccb6d3dc4e8 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -432,7 +432,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, } // Load each chunk from `load_stripe_chunks`. -void reader::impl::pass_preprocess() +void reader::impl::read_data() { if (_file_itm_data.has_no_data()) { return; } @@ -494,6 +494,7 @@ void reader::impl::pass_preprocess() } } +// TODO: merge with read_data() void reader::impl::subpass_preprocess() { if (_file_itm_data.has_no_data()) { return; } diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index ac6f4d4336b..ed6219d584c 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -761,7 +761,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, // TODO: fix this, should be called once while (_chunk_read_data.more_stripe_to_load()) { - pass_preprocess(); + read_data(); } // Fix this, subpass should be call once From ceaf1ff57a4dc5295931b9036b6dbe3210df6822 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 2 Feb 2024 22:06:33 +0700 Subject: [PATCH 066/321] Rename variable --- cpp/src/io/orc/reader_impl.cu | 6 +++--- cpp/src/io/orc/reader_impl.hpp | 2 +- cpp/src/io/parquet/reader_impl.cpp | 16 ++++++++-------- cpp/src/io/parquet/reader_impl.hpp | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 71996f026f9..48d110c61bd 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -67,7 +67,7 @@ table_with_metadata reader::impl::read(uint64_t skip_rows, table_metadata reader::impl::make_output_metadata() { - if (_output_metadata) { return table_metadata{*_output_metadata}; } + if (_out_metadata) { return table_metadata{*_out_metadata}; } // Copy user data to the output metadata. table_metadata out_metadata; @@ -88,8 +88,8 @@ table_metadata reader::impl::make_output_metadata() out_metadata.user_data = {out_metadata.per_file_user_data[0].begin(), out_metadata.per_file_user_data[0].end()}; - // Save the output table metadata into `_output_metadata` for reuse next time. - _output_metadata = std::make_unique(out_metadata); + // Save the output table metadata into `_out_metadata` for reuse next time. + _out_metadata = std::make_unique(out_metadata); return out_metadata; } diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 0b7c00d621d..d131d907fa1 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -180,7 +180,7 @@ class reader::impl { column_hierarchy const _selected_columns; // Construct from `_metadata` thus declare after it file_intermediate_data _file_itm_data; chunk_read_data _chunk_read_data; - std::unique_ptr _output_metadata; + std::unique_ptr _out_metadata; std::vector> _out_buffers; }; diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index 24d46d91dbb..bba7ad6b337 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -407,8 +407,8 @@ void reader::impl::populate_metadata(table_metadata& out_metadata) table_with_metadata reader::impl::read_chunk_internal( bool uses_custom_row_bounds, std::optional> filter) { - // If `_output_metadata` has been constructed, just copy it over. - auto out_metadata = _output_metadata ? table_metadata{*_output_metadata} : table_metadata{}; + // If `_out_metadata` has been constructed, just copy it over. + auto out_metadata = _out_metadata ? table_metadata{*_out_metadata} : table_metadata{}; out_metadata.schema_info.resize(_output_buffers.size()); // output cudf columns as determined by the top level schema @@ -439,8 +439,8 @@ table_with_metadata reader::impl::read_chunk_internal( metadata = std::make_optional(); metadata->set_convert_binary_to_strings(false); } - // Only construct `out_metadata` if `_output_metadata` has not been cached. - if (!_output_metadata) { + // Only construct `out_metadata` if `_out_metadata` has not been cached. + if (!_out_metadata) { column_name_info& col_name = out_metadata.schema_info[i]; out_columns.emplace_back(make_column(_output_buffers[i], &col_name, metadata, _stream)); } else { @@ -459,7 +459,7 @@ table_with_metadata reader::impl::finalize_output( { // Create empty columns as needed (this can happen if we've ended up with no actual data to read) for (size_t i = out_columns.size(); i < _output_buffers.size(); ++i) { - if (!_output_metadata) { + if (!_out_metadata) { column_name_info& col_name = out_metadata.schema_info[i]; out_columns.emplace_back(io::detail::empty_like(_output_buffers[i], &col_name, _stream, _mr)); } else { @@ -467,10 +467,10 @@ table_with_metadata reader::impl::finalize_output( } } - if (!_output_metadata) { + if (!_out_metadata) { populate_metadata(out_metadata); - // Finally, save the output table metadata into `_output_metadata` for reuse next time. - _output_metadata = std::make_unique(out_metadata); + // Finally, save the output table metadata into `_out_metadata` for reuse next time. + _out_metadata = std::make_unique(out_metadata); } // advance output chunk/subpass/pass info diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp index 67c56c9c2d7..09a1069e6c7 100644 --- a/cpp/src/io/parquet/reader_impl.hpp +++ b/cpp/src/io/parquet/reader_impl.hpp @@ -362,7 +362,7 @@ class reader::impl { std::vector _output_column_schemas; // _output_buffers associated metadata - std::unique_ptr _output_metadata; + std::unique_ptr _out_metadata; bool _strings_to_categorical = false; std::optional> _reader_column_schema; From a3052ddcec286fd14ff28bf661772e63b55ee673 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 3 Feb 2024 08:02:09 +0700 Subject: [PATCH 067/321] Add comments and unify functions --- cpp/src/io/orc/reader_impl.cu | 12 +++++------- cpp/src/io/orc/reader_impl.hpp | 4 ---- cpp/src/io/orc/reader_impl_chunking.cu | 20 ++------------------ cpp/src/io/orc/reader_impl_chunking.hpp | 2 +- cpp/src/io/orc/reader_impl_preprocess.cu | 5 +++++ 5 files changed, 13 insertions(+), 30 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 48d110c61bd..8a3f2e4fd4a 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -62,7 +62,7 @@ table_with_metadata reader::impl::read(uint64_t skip_rows, std::vector> const& stripes) { prepare_data(skip_rows, num_rows_opt, stripes); - return read_chunk_internal(); + return make_output_chunk(); } table_metadata reader::impl::make_output_metadata() @@ -94,7 +94,10 @@ table_metadata reader::impl::make_output_metadata() return out_metadata; } -table_with_metadata reader::impl::read_chunk_internal() +// TODO: move code here +void reader::impl::decompress_and_decode() {} + +table_with_metadata reader::impl::make_output_chunk() { // There is no columns in the table. if (_selected_columns.num_levels() == 0) { return {std::make_unique
(), table_metadata{}}; } @@ -135,11 +138,6 @@ table_with_metadata reader::impl::read_chunk_internal() return {std::make_unique
(std::move(out_columns)), std::move(out_metadata)}; } -// TODO: move code here -void reader::impl::decompress_and_decode() {} - -table_with_metadata reader::impl::make_output_chunk() { return table_with_metadata{}; } - // Forward to implementation reader::reader(std::vector>&& sources, orc_reader_options const& options, diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index d131d907fa1..4b6ab4494c6 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -128,10 +128,6 @@ class reader::impl { */ void read_data(); - /** - * TODO: merge with read data. - */ - void subpass_preprocess(); /** * @brief Decompress and decode the data in the internal buffers, and store the result into diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index ccb6d3dc4e8..408a01f6d41 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -492,26 +492,10 @@ void reader::impl::read_data() for (auto& task : read_tasks) { CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); } -} - -// TODO: merge with read_data() -void reader::impl::subpass_preprocess() -{ - if (_file_itm_data.has_no_data()) { return; } - - // auto const rows_to_read = _file_itm_data.rows_to_read; - auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_chunks; + stream_id_map stream_compinfo_map; - // TODO: This is subpass - // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. - std::unordered_map - stream_compinfo_map; - - // TODO: fix this, loop only current chunk - auto const stripe_chunk = - _chunk_read_data.load_stripe_chunks[_chunk_read_data.curr_load_stripe_chunk++]; cudf::detail::hostdevice_vector stripe_decomp_sizes(stripe_chunk.count, _stream); std::fill(stripe_decomp_sizes.begin(), stripe_decomp_sizes.end(), cumulative_size{1, 0}); @@ -590,7 +574,7 @@ void reader::impl::subpass_preprocess() #endif } - // Must clear so we will not overwrite the old compression info stream_id. + // Must clear map since the next level will have similar keys. stream_compinfo_map.clear(); } else { diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 2d07cae3214..85dea4194d4 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -191,7 +191,7 @@ struct file_intermediate_data { std::vector> lvl_stripe_stream_chunks; -// TODO +// TODO rename std::vector>> null_count_prefix_sums; // For data processing, decompression, and decoding. diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index ed6219d584c..eae77e4d71f 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -796,6 +796,11 @@ void reader::impl::prepare_data(uint64_t skip_rows, lvl_chunks.resize(_selected_columns.num_levels()); _out_buffers.resize(_selected_columns.num_levels()); + +// +// +// +// TODO: move this to reader_impl.cu, decomp and decode step std::size_t num_stripes = selected_stripes.size(); // Iterates through levels of nested columns, child column will be one level down From 2f62b8c55ddb5d09b6ffa527fd5bf50f793b9c91 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 3 Feb 2024 09:16:04 +0700 Subject: [PATCH 068/321] Reorganize variables --- cpp/src/io/orc/reader_impl.cu | 22 +++++++++++----------- cpp/src/io/orc/reader_impl.hpp | 17 +++++++---------- cpp/src/io/orc/reader_impl_preprocess.cu | 10 +++++----- 3 files changed, 23 insertions(+), 26 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 8a3f2e4fd4a..c55fe69d463 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -26,10 +26,10 @@ reader::impl::impl(std::vector>&& sources, rmm::mr::device_memory_resource* mr) : _stream(stream), _mr(mr), - _timestamp_type{options.get_timestamp_type()}, - _use_index{options.is_enabled_use_index()}, - _use_np_dtypes{options.is_enabled_use_np_dtypes()}, - _decimal128_columns{options.get_decimal128_columns()}, + _config{options.get_timestamp_type(), + options.is_enabled_use_index(), + options.is_enabled_use_np_dtypes(), + options.get_decimal128_columns()}, _col_meta{std::make_unique()}, _sources(std::move(sources)), _metadata{_sources, stream}, @@ -45,10 +45,10 @@ reader::impl::impl(std::size_t output_size_limit, rmm::mr::device_memory_resource* mr) : _stream(stream), _mr(mr), - _timestamp_type{options.get_timestamp_type()}, - _use_index{options.is_enabled_use_index()}, - _use_np_dtypes{options.is_enabled_use_np_dtypes()}, - _decimal128_columns{options.get_decimal128_columns()}, + _config.timestamp_type{options.get_timestamp_type()}, + _config.use_index{options.is_enabled_use_index()}, + _config.use_np_dtypes{options.is_enabled_use_np_dtypes()}, + _config.decimal128_columns{options.get_decimal128_columns()}, _col_meta{std::make_unique()}, _sources(std::move(sources)), _metadata{_sources, stream}, @@ -114,9 +114,9 @@ table_with_metadata reader::impl::make_output_chunk() out_metadata.schema_info.emplace_back(""); return create_empty_column(col_meta.id, _metadata, - _decimal128_columns, - _use_np_dtypes, - _timestamp_type, + _config.decimal128_columns, + _config.use_np_dtypes, + _config.timestamp_type, out_metadata.schema_info.back(), _stream); }); diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 4b6ab4494c6..083701e62ff 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -155,22 +155,19 @@ class reader::impl { */ table_metadata make_output_metadata(); - /** - * TODO: move code to make_output_chunk - */ - table_with_metadata read_chunk_internal(); - rmm::cuda_stream_view const _stream; rmm::mr::device_memory_resource* const _mr; // Reader configs - data_type const _timestamp_type; // Override output timestamp resolution - bool const _use_index; // Enable or disable attempt to use row index for parsing - bool const _use_np_dtypes; // Enable or disable the conversion to numpy-compatible dtypes - std::vector const _decimal128_columns; // Control decimals conversion - std::unique_ptr const _col_meta; // Track of orc mapping and child details + struct { + data_type timestamp_type; // Override output timestamp resolution + bool use_index; // Enable or disable attempt to use row index for parsing + bool use_np_dtypes; // Enable or disable the conversion to numpy-compatible dtypes + std::vector decimal128_columns; // Control decimals conversion + } const _config; // Intermediate data for internal processing. + std::unique_ptr const _col_meta; // Track of orc mapping and child details std::vector> const _sources; // Unused but owns data for `_metadata` aggregate_orc_metadata _metadata; column_hierarchy const _selected_columns; // Construct from `_metadata` thus declare after it diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index eae77e4d71f..88a423ec506 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -815,9 +815,9 @@ void reader::impl::prepare_data(uint64_t skip_rows, std::vector column_types; for (auto& col : columns_level) { auto col_type = to_cudf_type(_metadata.get_col_type(col.id).kind, - _use_np_dtypes, - _timestamp_type.id(), - to_cudf_decimal_type(_decimal128_columns, _metadata, col.id)); + _config.use_np_dtypes, + _config.timestamp_type.id(), + to_cudf_decimal_type(_config.decimal128_columns, _metadata, col.id)); CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type"); if (col_type == type_id::DECIMAL32 or col_type == type_id::DECIMAL64 or col_type == type_id::DECIMAL128) { @@ -843,7 +843,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); const bool use_index = - _use_index && + _config.use_index && // Do stripes have row group index _metadata.is_row_grp_idx_present() && // Only use if we don't have much work with complete columns & stripes @@ -944,7 +944,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, ? sizeof(size_type) : cudf::size_of(column_types[col_idx]); chunk.num_rowgroups = stripe_num_rowgroups; - if (chunk.type_kind == orc::TIMESTAMP) { chunk.timestamp_type_id = _timestamp_type.id(); } + if (chunk.type_kind == orc::TIMESTAMP) { chunk.timestamp_type_id = _config.timestamp_type.id(); } if (not is_stripe_data_empty) { for (int k = 0; k < gpu::CI_NUM_STREAMS; k++) { chunk.streams[k] = dst_base + stream_info[chunk.strm_id[k]].dst_pos; From c99d33838e34e08a85b064239eea23ba3d43542f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 3 Feb 2024 09:19:28 +0700 Subject: [PATCH 069/321] Rewrite constructor --- cpp/src/io/orc/reader_impl.cu | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index c55fe69d463..bc0d3ea1c35 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -43,16 +43,11 @@ reader::impl::impl(std::size_t output_size_limit, orc_reader_options const& options, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) - : _stream(stream), - _mr(mr), - _config.timestamp_type{options.get_timestamp_type()}, - _config.use_index{options.is_enabled_use_index()}, - _config.use_np_dtypes{options.is_enabled_use_np_dtypes()}, - _config.decimal128_columns{options.get_decimal128_columns()}, - _col_meta{std::make_unique()}, - _sources(std::move(sources)), - _metadata{_sources, stream}, - _selected_columns{_metadata.select_columns(options.get_columns())}, + : + reader::impl::impl(std::move(sources), + options, + stream, + mr ), _chunk_read_data{output_size_limit, data_read_limit} { } From 504208bed1d50610e96dc4e8cc6eead656c44841 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 3 Feb 2024 10:15:05 +0700 Subject: [PATCH 070/321] Reorganize code --- cpp/src/io/orc/reader_impl.cu | 83 +++++++++++++++--------- cpp/src/io/orc/reader_impl_chunking.hpp | 1 + cpp/src/io/orc/reader_impl_preprocess.cu | 1 + 3 files changed, 53 insertions(+), 32 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index bc0d3ea1c35..b5edaafc49b 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -60,35 +60,6 @@ table_with_metadata reader::impl::read(uint64_t skip_rows, return make_output_chunk(); } -table_metadata reader::impl::make_output_metadata() -{ - if (_out_metadata) { return table_metadata{*_out_metadata}; } - - // Copy user data to the output metadata. - table_metadata out_metadata; - out_metadata.per_file_user_data.reserve(_metadata.per_file_metadata.size()); - std::transform(_metadata.per_file_metadata.cbegin(), - _metadata.per_file_metadata.cend(), - std::back_inserter(out_metadata.per_file_user_data), - [](auto const& meta) { - std::unordered_map kv_map; - std::transform(meta.ff.metadata.cbegin(), - meta.ff.metadata.cend(), - std::inserter(kv_map, kv_map.end()), - [](auto const& kv) { - return std::pair{kv.name, kv.value}; - }); - return kv_map; - }); - out_metadata.user_data = {out_metadata.per_file_user_data[0].begin(), - out_metadata.per_file_user_data[0].end()}; - - // Save the output table metadata into `_out_metadata` for reuse next time. - _out_metadata = std::make_unique(out_metadata); - - return out_metadata; -} - // TODO: move code here void reader::impl::decompress_and_decode() {} @@ -101,11 +72,11 @@ table_with_metadata reader::impl::make_output_chunk() auto out_metadata = make_output_metadata(); // If no rows or stripes to read, return empty columns - if (_file_itm_data.has_no_data()) { + if (_file_itm_data.has_no_data() || !_chunk_read_data.has_next()) { std::transform(_selected_columns.levels[0].begin(), _selected_columns.levels[0].end(), std::back_inserter(out_columns), - [&](auto const col_meta) { + [&](auto const & col_meta) { out_metadata.schema_info.emplace_back(""); return create_empty_column(col_meta.id, _metadata, @@ -118,6 +89,7 @@ table_with_metadata reader::impl::make_output_chunk() return {std::make_unique
(std::move(out_columns)), std::move(out_metadata)}; } +// TODO: move this into decompress_and_decode // Create columns from buffer with respective schema information. std::transform( _selected_columns.levels[0].begin(), @@ -130,9 +102,56 @@ table_with_metadata reader::impl::make_output_chunk() return make_column(col_buffer, &out_metadata.schema_info.back(), std::nullopt, _stream); }); - return {std::make_unique
(std::move(out_columns)), std::move(out_metadata)}; +// todo: remove this + // auto decoded_table = std::make_unique
(std::move(out_columns)); + + auto out_table = [&] { + if(_chunk_read_data.output_table_chunks.size() == 1){ + return std::move(_chunk_read_data.decoded_table); + } + + auto const out_chunk = _chunk_read_data.output_table_chunks[_chunked_read_data.curr_output_table_chunk++]; + auto const out_tview = cudf::slice(_chunk_read_data.decoded_table->view(), + {out_chunk.first, + out_chunk.second}, _stream)[0]; + return std::make_unique
(out_tview); + }(); + + + return {std::move(out_table), std::move(out_metadata)}; +} + + +table_metadata reader::impl::make_output_metadata() +{ + if (_out_metadata) { return table_metadata{*_out_metadata}; } + + // Copy user data to the output metadata. + table_metadata out_metadata; + out_metadata.per_file_user_data.reserve(_metadata.per_file_metadata.size()); + std::transform(_metadata.per_file_metadata.cbegin(), + _metadata.per_file_metadata.cend(), + std::back_inserter(out_metadata.per_file_user_data), + [](auto const& meta) { + std::unordered_map kv_map; + std::transform(meta.ff.metadata.cbegin(), + meta.ff.metadata.cend(), + std::inserter(kv_map, kv_map.end()), + [](auto const& kv) { + return std::pair{kv.name, kv.value}; + }); + return kv_map; + }); + out_metadata.user_data = {out_metadata.per_file_user_data[0].begin(), + out_metadata.per_file_user_data[0].end()}; + + // Save the output table metadata into `_out_metadata` for reuse next time. + _out_metadata = std::make_unique(out_metadata); + + return out_metadata; } + // Forward to implementation reader::reader(std::vector>&& sources, orc_reader_options const& options, diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 85dea4194d4..279503c175b 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -232,6 +232,7 @@ struct chunk_read_data { // Chunk of rows in the internal decoded table to output for each `read_chunk()`. std::vector output_table_chunks; std::size_t curr_output_table_chunk{0}; + std::unique_ptr decoded_table; bool more_table_chunk_to_output() const { return curr_output_table_chunk < output_table_chunks.size(); diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 88a423ec506..2ec78ac84f8 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -52,6 +52,7 @@ namespace cudf::io::orc::detail { namespace { +// TODO: merge this with gather stream info /** * @brief Function that populates column descriptors stream/chunk */ From df95f64eb6fbc77a65da7e18429d4aea8a1641ad Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 3 Feb 2024 21:33:09 +0700 Subject: [PATCH 071/321] Rewrite `gather_stream_info` --- cpp/src/io/orc/reader_impl_chunking.cu | 64 ++------------ cpp/src/io/orc/reader_impl_helpers.cpp | 104 +++++++++++++++++++++++ cpp/src/io/orc/reader_impl_helpers.hpp | 18 ++++ cpp/src/io/orc/reader_impl_preprocess.cu | 104 ++--------------------- 4 files changed, 138 insertions(+), 152 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 408a01f6d41..2cd40f5db0d 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -52,61 +52,6 @@ namespace cudf::io::orc::detail { namespace { -/** - * @brief Function that populates column descriptors stream/chunk - */ -std::size_t gather_stream_info(std::size_t stripe_index, - std::size_t level, - orc::StripeInformation const* stripeinfo, - orc::StripeFooter const* stripefooter, - host_span orc2gdf, - host_span types, - bool apply_struct_map, - std::vector& stream_info) -{ - uint64_t src_offset = 0; - uint64_t dst_offset = 0; - - for (auto const& stream : stripefooter->streams) { - if (!stream.column_id || *stream.column_id >= orc2gdf.size()) { - dst_offset += stream.length; - continue; - } - - auto const column_id = *stream.column_id; - auto col = orc2gdf[column_id]; - - if (col == -1 and apply_struct_map) { - // A struct-type column has no data itself, but rather child columns - // for each of its fields. There is only a PRESENT stream, which - // needs to be included for the reader. - auto const schema_type = types[column_id]; - if (not schema_type.subtypes.empty()) { - if (schema_type.kind == orc::STRUCT && stream.kind == orc::PRESENT) { - for (auto const& idx : schema_type.subtypes) { - auto child_idx = (idx < orc2gdf.size()) ? orc2gdf[idx] : -1; - if (child_idx >= 0) { col = child_idx; } - } - } - } - } - - if (col != -1) { - stream_info.emplace_back(stripeinfo->offset + src_offset, - dst_offset, - stream.length, - stream_id_info{stripe_index, - level, - column_id, - stream.kind}); - dst_offset += stream.length; - } - src_offset += stream.length; - } - - return dst_offset; -} - struct cumulative_size { int64_t count{0}; std::size_t size_bytes{0}; @@ -341,14 +286,19 @@ void reader::impl::global_preprocess(uint64_t skip_rows, auto& stripe_sizes = lvl_stripe_sizes[level]; auto stream_count = stream_info.size(); - auto const stripe_size = gather_stream_info(stripe_idx, + auto const stripe_size = gather_stream_info_and_column_desc(stripe_idx, level, stripe_info, stripe_footer, col_meta.orc_col_map[level], _metadata.get_types(), + false, // use_index, level == 0, - stream_info); + nullptr, // num_dictionary_entries + nullptr, // stream_idx + &stream_info, + std::nullopt // chunks + ); auto const is_stripe_data_empty = stripe_size == 0; CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, diff --git a/cpp/src/io/orc/reader_impl_helpers.cpp b/cpp/src/io/orc/reader_impl_helpers.cpp index ea4e5dcfaab..812e3474bba 100644 --- a/cpp/src/io/orc/reader_impl_helpers.cpp +++ b/cpp/src/io/orc/reader_impl_helpers.cpp @@ -18,6 +18,110 @@ namespace cudf::io::orc::detail { + +std::size_t gather_stream_info_and_column_desc( + std::size_t stripe_index, + std::size_t level, + orc::StripeInformation const* stripeinfo, + orc::StripeFooter const* stripefooter, + host_span orc2gdf, + host_span types, + bool use_index, + bool apply_struct_map, + std::size_t* num_dictionary_entries, + std::size_t* stream_idx, + std::optional*> const& stream_info, + std::optional*> const& chunks) +{ + CUDF_EXPECTS(stream_info.has_value() ^ chunks.has_value(), + "Either stream_info or chunks must be provided, but not both."); + + uint64_t src_offset = 0; + uint64_t dst_offset = 0; + + auto const get_stream_index_type = [](orc::StreamKind kind) { + switch (kind) { + case orc::DATA: return gpu::CI_DATA; + case orc::LENGTH: + case orc::SECONDARY: return gpu::CI_DATA2; + case orc::DICTIONARY_DATA: return gpu::CI_DICTIONARY; + case orc::PRESENT: return gpu::CI_PRESENT; + case orc::ROW_INDEX: return gpu::CI_INDEX; + default: + // Skip this stream as it's not strictly required + return gpu::CI_NUM_STREAMS; + } + }; + + for (auto const& stream : stripefooter->streams) { + if (!stream.column_id || *stream.column_id >= orc2gdf.size()) { + // TODO: fix dst to src + src_offset += stream.length; + continue; + } + + auto const column_id = *stream.column_id; + auto col = orc2gdf[column_id]; + + if (col == -1 and apply_struct_map) { + // A struct-type column has no data itself, but rather child columns + // for each of its fields. There is only a PRESENT stream, which + // needs to be included for the reader. + auto const schema_type = types[column_id]; + if (! schema_type.subtypes.empty() && schema_type.kind == orc::STRUCT && + stream.kind == orc::PRESENT) { + + for (auto const& idx : schema_type.subtypes) { + auto const child_idx = (idx < orc2gdf.size()) ? orc2gdf[idx] : -1; + if (child_idx >= 0) { + col = child_idx; + if(chunks.has_value()) { + auto& chunk = (*chunks.value())[stripe_index][col]; + chunk.strm_id[gpu::CI_PRESENT] = *stream_idx; + chunk.strm_len[gpu::CI_PRESENT] = stream.length; + } + } + } + } + } + if (col != -1) { + if (chunks.has_value()) { + if (src_offset >= stripeinfo->indexLength || use_index) { + auto const index_type = get_stream_index_type(stream.kind); + if (index_type < gpu::CI_NUM_STREAMS) { + auto& chunk = (*chunks.value())[stripe_index][col]; + chunk.strm_id[index_type] = *stream_idx; + chunk.strm_len[index_type] = stream.length; + // NOTE: skip_count field is temporarily used to track the presence of index streams + chunk.skip_count |= 1 << index_type; + + if (index_type == gpu::CI_DICTIONARY) { + chunk.dictionary_start = *num_dictionary_entries; + chunk.dict_len = stripefooter->columns[column_id].dictionarySize; + *num_dictionary_entries += stripefooter->columns[column_id].dictionarySize; + } + } + } + (*stream_idx)++; + } else { // not chunks.has_value() + stream_info.value().emplace_back(stripeinfo->offset + src_offset, + dst_offset, + stream.length, + stream_id_info{stripe_index, + level, + column_id, + stream.kind}); + } + + + dst_offset += stream.length; + } + src_offset += stream.length; + } + + return dst_offset; +} + std::unique_ptr create_empty_column(size_type orc_col_id, aggregate_orc_metadata const& metadata, host_span decimal128_columns, diff --git a/cpp/src/io/orc/reader_impl_helpers.hpp b/cpp/src/io/orc/reader_impl_helpers.hpp index f0d91c75fc3..811cd05cdce 100644 --- a/cpp/src/io/orc/reader_impl_helpers.hpp +++ b/cpp/src/io/orc/reader_impl_helpers.hpp @@ -127,6 +127,24 @@ inline std::string get_map_child_col_name(std::size_t const idx) return (idx == 0) ? "key" : "value"; } + +/** + * @brief Function that populates descriptors for either individual streams or chunks of column data, but not both. + */ +std::size_t gather_stream_info_and_column_desc( + std::size_t stripe_index, + std::size_t level, + orc::StripeInformation const* stripeinfo, + orc::StripeFooter const* stripefooter, + host_span orc2gdf, + host_span types, + bool use_index, + bool apply_struct_map, + std::size_t* num_dictionary_entries, + std::size_t* stream_idx, + std::optional*> const& stream_info, + std::optional*> const& chunks); + /** * @brief Create empty columns and respective schema information from the buffer. */ diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 2ec78ac84f8..90404c7b9ca 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -52,95 +52,6 @@ namespace cudf::io::orc::detail { namespace { -// TODO: merge this with gather stream info -/** - * @brief Function that populates column descriptors stream/chunk - */ -std::size_t gather_stream_info_and_update_chunks( - std::size_t stripe_index, - std::size_t level, - orc::StripeInformation const* stripeinfo, - orc::StripeFooter const* stripefooter, - host_span orc2gdf, - host_span types, - bool use_index, - bool apply_struct_map, - std::size_t* num_dictionary_entries, - std::size_t* stream_idx, - cudf::detail::hostdevice_2dvector& chunks) -{ - uint64_t src_offset = 0; - uint64_t dst_offset = 0; - - auto const get_stream_index_type = [](orc::StreamKind kind) { - switch (kind) { - case orc::DATA: return gpu::CI_DATA; - case orc::LENGTH: - case orc::SECONDARY: return gpu::CI_DATA2; - case orc::DICTIONARY_DATA: return gpu::CI_DICTIONARY; - case orc::PRESENT: return gpu::CI_PRESENT; - case orc::ROW_INDEX: return gpu::CI_INDEX; - default: - // Skip this stream as it's not strictly required - return gpu::CI_NUM_STREAMS; - } - }; - - for (auto const& stream : stripefooter->streams) { - if (!stream.column_id || *stream.column_id >= orc2gdf.size()) { - dst_offset += stream.length; - continue; - } - - auto const column_id = *stream.column_id; - auto col = orc2gdf[column_id]; - - if (col == -1 and apply_struct_map) { - // A struct-type column has no data itself, but rather child columns - // for each of its fields. There is only a PRESENT stream, which - // needs to be included for the reader. - auto const schema_type = types[column_id]; - if (not schema_type.subtypes.empty()) { - if (schema_type.kind == orc::STRUCT && stream.kind == orc::PRESENT) { - for (auto const& idx : schema_type.subtypes) { - auto child_idx = (idx < orc2gdf.size()) ? orc2gdf[idx] : -1; - if (child_idx >= 0) { - col = child_idx; - auto& chunk = chunks[stripe_index][col]; - chunk.strm_id[gpu::CI_PRESENT] = *stream_idx; - chunk.strm_len[gpu::CI_PRESENT] = stream.length; - } - } - } - } - } - if (col != -1) { - if (src_offset >= stripeinfo->indexLength || use_index) { - auto& chunk = chunks[stripe_index][col]; - auto const index_type = get_stream_index_type(stream.kind); - if (index_type < gpu::CI_NUM_STREAMS) { - chunk.strm_id[index_type] = *stream_idx; - chunk.strm_len[index_type] = stream.length; - // NOTE: skip_count field is temporarily used to track the presence of index streams - chunk.skip_count |= 1 << index_type; - - if (index_type == gpu::CI_DICTIONARY) { - chunk.dictionary_start = *num_dictionary_entries; - chunk.dict_len = stripefooter->columns[column_id].dictionarySize; - *num_dictionary_entries += stripefooter->columns[column_id].dictionarySize; - } - } - } - - (*stream_idx)++; - dst_offset += stream.length; - } - src_offset += stream.length; - } - - return dst_offset; -} - // TODO: update /** * @brief Decompresses the stripe data, at stream granularity. @@ -353,6 +264,8 @@ rmm::device_buffer decompress_stripe_data( default: CUDF_FAIL("Unexpected decompression dispatch"); break; } + // TODO: proclam return type + // Check if any block has been failed to decompress. // Not using `thrust::any` or `thrust::count_if` to defer stream sync. thrust::for_each( @@ -627,6 +540,8 @@ void scan_null_counts(cudf::detail::hostdevice_2dvector const& stream.synchronize(); } + +// TODO: this is called for each chunk of stripes. /** * @brief Aggregate child metadata from parent column chunks. */ @@ -809,6 +724,8 @@ void reader::impl::prepare_data(uint64_t skip_rows, auto& col_meta = *_col_meta; for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto& columns_level = _selected_columns.levels[level]; + + // TODO: do it in global step // Association between each ORC column and its cudf::column std::vector nested_cols; @@ -883,7 +800,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, auto const stripe_info = stripe.stripe_info; auto const stripe_footer = stripe.stripe_footer; - auto const total_data_size = gather_stream_info_and_update_chunks(stripe_idx, + auto const total_data_size = gather_stream_info_and_column_desc(stripe_idx, level, stripe_info, stripe_footer, @@ -893,7 +810,8 @@ void reader::impl::prepare_data(uint64_t skip_rows, level == 0, &num_dict_entries, &stream_idx, - chunks); + std::nullopt, // stream_info + &chunks); auto const is_stripe_data_empty = total_data_size == 0; CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, @@ -958,10 +876,6 @@ void reader::impl::prepare_data(uint64_t skip_rows, stripe_idx++; } - // for (auto& task : read_tasks) { - // CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); - // } - if (stripe_data.empty()) { continue; } // Process dataset chunk pages into output columns From e6ebcc0cba17383b46acb1c76b9307fd7210beb3 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 9 Feb 2024 14:44:32 +0700 Subject: [PATCH 072/321] Remove `_preprocess.cu` file --- cpp/CMakeLists.txt | 1 - cpp/src/io/orc/reader_impl.cu | 959 ++++++++++++++++++++++ cpp/src/io/orc/reader_impl_preprocess.cu | 980 ----------------------- 3 files changed, 959 insertions(+), 981 deletions(-) delete mode 100644 cpp/src/io/orc/reader_impl_preprocess.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 49c19596d23..7719d702eec 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -389,7 +389,6 @@ add_library( src/io/orc/reader_impl.cu src/io/orc/reader_impl_chunking.cu src/io/orc/reader_impl_helpers.cpp - src/io/orc/reader_impl_preprocess.cu src/io/orc/stats_enc.cu src/io/orc/stripe_data.cu src/io/orc/stripe_enc.cu diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index b5edaafc49b..c8f8635c9f2 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -14,12 +14,971 @@ * limitations under the License. */ + +// #define PRINT_DEBUG + #include "reader_impl.hpp" #include "reader_impl_chunking.hpp" #include "reader_impl_helpers.hpp" +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + namespace cudf::io::orc::detail { +namespace { + +// TODO: update +/** + * @brief Decompresses the stripe data, at stream granularity. + * + * @param decompressor Block decompressor + * @param stripe_data List of source stripe column data + * @param stream_info List of stream to column mappings + * @param chunks Vector of list of column chunk descriptors + * @param row_groups Vector of list of row index descriptors + * @param num_stripes Number of stripes making up column chunks + * @param row_index_stride Distance between each row index + * @param use_base_stride Whether to use base stride obtained from meta or use the computed value + * @param stream CUDA stream used for device memory operations and kernel launches + * @return Device buffer to decompressed page data + */ +rmm::device_buffer decompress_stripe_data( + stream_id_map const& compinfo_map, + OrcDecompressor const& decompressor, + host_span stripe_data, + host_span stream_info, + cudf::detail::hostdevice_2dvector& chunks, + cudf::detail::hostdevice_2dvector& row_groups, + std::size_t num_stripes, + std::size_t row_index_stride, + bool use_base_stride, + rmm::cuda_stream_view stream) +{ + // Count the exact number of compressed blocks + std::size_t num_compressed_blocks = 0; + std::size_t num_uncompressed_blocks = 0; + std::size_t total_decomp_size = 0; + + cudf::detail::hostdevice_vector compinfo( + 0, stream_info.size(), stream); + + for (auto const& info : stream_info) { +#ifdef PRINT_DEBUG + printf("collec stream again [%d, %d, %d, %d]: dst = %lu, length = %lu\n", + (int)info.id.stripe_idx, + (int)info.id.level, + (int)info.id.orc_cold_idx, + (int)info.id.kind, + info.dst_pos, + info.length); + fflush(stdout); +#endif + + compinfo.push_back(gpu::CompressedStreamInfo( + static_cast(stripe_data[info.id.stripe_idx].data()) + info.dst_pos, + info.length)); + + // printf("line %d\n", __LINE__); + // fflush(stdout); + auto const& cached_comp_info = + compinfo_map.at(stream_id_info{info.id.stripe_idx, info.id.level, info.id.orc_cold_idx, info.id.kind}); + // printf("line %d\n", __LINE__); + // fflush(stdout); + // auto const& cached_comp_info = + // compinfo_map[stream_id_info{info.id.stripe_idx, info.id.level, info.id.orc_cold_idx, info.id.kind}]; + auto& stream_comp_info = compinfo[compinfo.size() - 1]; + stream_comp_info.num_compressed_blocks = cached_comp_info.num_compressed_blocks; + stream_comp_info.num_uncompressed_blocks = cached_comp_info.num_uncompressed_blocks; + stream_comp_info.max_uncompressed_size = cached_comp_info.total_decomp_size; + + num_compressed_blocks += cached_comp_info.num_compressed_blocks; + num_uncompressed_blocks += cached_comp_info.num_uncompressed_blocks; + total_decomp_size += cached_comp_info.total_decomp_size; + } + + CUDF_EXPECTS( + not((num_uncompressed_blocks + num_compressed_blocks > 0) and (total_decomp_size == 0)), + "Inconsistent info on compression blocks"); + +#ifdef XXX + std::size_t old_num_compressed_blocks = num_compressed_blocks; + std::size_t old_num_uncompressed_blocks = num_uncompressed_blocks; + std::size_t old_total_decomp_size = total_decomp_size; + + num_compressed_blocks = 0; + num_uncompressed_blocks = 0; + total_decomp_size = 0; + for (std::size_t i = 0; i < compinfo.size(); ++i) { + num_compressed_blocks += compinfo[i].num_compressed_blocks; + num_uncompressed_blocks += compinfo[i].num_uncompressed_blocks; + total_decomp_size += compinfo[i].max_uncompressed_size; + + auto const& info = stream_info[i]; + printf("compute info [%d, %d, %d, %d]: %lu | %lu | %lu\n", + (int)info.id.stripe_idx, + (int)info.id.level, + (int)info.id.orc_cold_idx, + (int)info.id.kind, + (size_t)compinfo[i].num_compressed_blocks, + (size_t)compinfo[i].num_uncompressed_blocks, + compinfo[i].max_uncompressed_size); + fflush(stdout); + } + + if (old_num_compressed_blocks != num_compressed_blocks || + old_num_uncompressed_blocks != num_uncompressed_blocks || + old_total_decomp_size != total_decomp_size) { + printf("invalid: %d - %d, %d - %d, %d - %d\n", + (int)old_num_compressed_blocks, + (int)num_compressed_blocks, + (int)old_num_uncompressed_blocks, + (int)num_uncompressed_blocks, + (int)old_total_decomp_size, + (int)total_decomp_size + + ); + } +#endif + + // Buffer needs to be padded. + // Required by `gpuDecodeOrcColumnData`. + rmm::device_buffer decomp_data( + cudf::util::round_up_safe(total_decomp_size, BUFFER_PADDING_MULTIPLE), stream); + if (decomp_data.is_empty()) { return decomp_data; } + + rmm::device_uvector> inflate_in( + num_compressed_blocks + num_uncompressed_blocks, stream); + rmm::device_uvector> inflate_out( + num_compressed_blocks + num_uncompressed_blocks, stream); + rmm::device_uvector inflate_res(num_compressed_blocks, stream); + thrust::fill(rmm::exec_policy(stream), + inflate_res.begin(), + inflate_res.end(), + compression_result{0, compression_status::FAILURE}); + + // Parse again to populate the decompression input/output buffers + std::size_t decomp_offset = 0; + uint32_t max_uncomp_block_size = 0; + uint32_t start_pos = 0; + auto start_pos_uncomp = (uint32_t)num_compressed_blocks; + for (std::size_t i = 0; i < compinfo.size(); ++i) { + auto dst_base = static_cast(decomp_data.data()); + compinfo[i].uncompressed_data = dst_base + decomp_offset; + compinfo[i].dec_in_ctl = inflate_in.data() + start_pos; + compinfo[i].dec_out_ctl = inflate_out.data() + start_pos; + compinfo[i].dec_res = {inflate_res.data() + start_pos, compinfo[i].num_compressed_blocks}; + compinfo[i].copy_in_ctl = inflate_in.data() + start_pos_uncomp; + compinfo[i].copy_out_ctl = inflate_out.data() + start_pos_uncomp; + + // stream_info[i].dst_pos = decomp_offset; + decomp_offset += compinfo[i].max_uncompressed_size; + start_pos += compinfo[i].num_compressed_blocks; + start_pos_uncomp += compinfo[i].num_uncompressed_blocks; + max_uncomp_block_size = + std::max(max_uncomp_block_size, compinfo[i].max_uncompressed_block_size); + } + compinfo.host_to_device_async(stream); + gpu::ParseCompressedStripeData(compinfo.device_ptr(), + compinfo.size(), + decompressor.GetBlockSize(), + decompressor.GetLog2MaxCompressionRatio(), + stream); + + // Value for checking whether we decompress successfully. + // It doesn't need to be atomic as there is no race condition: we only write `true` if needed. + cudf::detail::hostdevice_vector any_block_failure(1, stream); + any_block_failure[0] = false; + any_block_failure.host_to_device_async(stream); + + // Dispatch batches of blocks to decompress + if (num_compressed_blocks > 0) { + device_span> inflate_in_view{inflate_in.data(), + num_compressed_blocks}; + device_span> inflate_out_view{inflate_out.data(), num_compressed_blocks}; + switch (decompressor.compression()) { + case compression_type::ZLIB: + if (nvcomp::is_decompression_disabled(nvcomp::compression_type::DEFLATE)) { + gpuinflate( + inflate_in_view, inflate_out_view, inflate_res, gzip_header_included::NO, stream); + } else { + nvcomp::batched_decompress(nvcomp::compression_type::DEFLATE, + inflate_in_view, + inflate_out_view, + inflate_res, + max_uncomp_block_size, + total_decomp_size, + stream); + } + break; + case compression_type::SNAPPY: + if (nvcomp::is_decompression_disabled(nvcomp::compression_type::SNAPPY)) { + gpu_unsnap(inflate_in_view, inflate_out_view, inflate_res, stream); + } else { + nvcomp::batched_decompress(nvcomp::compression_type::SNAPPY, + inflate_in_view, + inflate_out_view, + inflate_res, + max_uncomp_block_size, + total_decomp_size, + stream); + } + break; + case compression_type::ZSTD: + if (auto const reason = nvcomp::is_decompression_disabled(nvcomp::compression_type::ZSTD); + reason) { + CUDF_FAIL("Decompression error: " + reason.value()); + } + nvcomp::batched_decompress(nvcomp::compression_type::ZSTD, + inflate_in_view, + inflate_out_view, + inflate_res, + max_uncomp_block_size, + total_decomp_size, + stream); + break; + default: CUDF_FAIL("Unexpected decompression dispatch"); break; + } + + // TODO: proclam return type + + // Check if any block has been failed to decompress. + // Not using `thrust::any` or `thrust::count_if` to defer stream sync. + thrust::for_each( + rmm::exec_policy(stream), + thrust::make_counting_iterator(std::size_t{0}), + thrust::make_counting_iterator(inflate_res.size()), + [results = inflate_res.begin(), + any_block_failure = any_block_failure.device_ptr()] __device__(auto const idx) { + if (results[idx].status != compression_status::SUCCESS) { *any_block_failure = true; } + }); + } + + if (num_uncompressed_blocks > 0) { + device_span> copy_in_view{inflate_in.data() + num_compressed_blocks, + num_uncompressed_blocks}; + device_span> copy_out_view{inflate_out.data() + num_compressed_blocks, + num_uncompressed_blocks}; + gpu_copy_uncompressed_blocks(copy_in_view, copy_out_view, stream); + } + + // Copy without stream sync, thus need to wait for stream sync below to access. + any_block_failure.device_to_host_async(stream); + + gpu::PostDecompressionReassemble(compinfo.device_ptr(), compinfo.size(), stream); + compinfo.device_to_host_sync(stream); // This also sync stream for `any_block_failure`. + + // We can check on host after stream synchronize + CUDF_EXPECTS(not any_block_failure[0], "Error during decompression"); + + auto const num_columns = chunks.size().second; + + // Update the stream information with the updated uncompressed info + // TBD: We could update the value from the information we already + // have in stream_info[], but using the gpu results also updates + // max_uncompressed_size to the actual uncompressed size, or zero if + // decompression failed. + for (std::size_t i = 0; i < num_stripes; ++i) { + for (std::size_t j = 0; j < num_columns; ++j) { + auto& chunk = chunks[i][j]; + for (int k = 0; k < gpu::CI_NUM_STREAMS; ++k) { + if (chunk.strm_len[k] > 0 && chunk.strm_id[k] < compinfo.size()) { + chunk.streams[k] = compinfo[chunk.strm_id[k]].uncompressed_data; + chunk.strm_len[k] = compinfo[chunk.strm_id[k]].max_uncompressed_size; + } + } + } + } + + if (row_groups.size().first) { + chunks.host_to_device_async(stream); + row_groups.host_to_device_async(stream); + gpu::ParseRowGroupIndex(row_groups.base_device_ptr(), + compinfo.device_ptr(), + chunks.base_device_ptr(), + num_columns, + num_stripes, + row_groups.size().first, + row_index_stride, + use_base_stride, + stream); + } + + return decomp_data; +} + +/** + * @brief Updates null mask of columns whose parent is a struct column. + * + * If struct column has null element, that row would be skipped while writing child column in ORC, + * so we need to insert the missing null elements in child column. There is another behavior from + * pyspark, where if the child column doesn't have any null elements, it will not have present + * stream, so in that case parent null mask need to be copied to child column. + * + * @param chunks Vector of list of column chunk descriptors + * @param out_buffers Output columns' device buffers + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource to use for device memory allocation + */ +void update_null_mask(cudf::detail::hostdevice_2dvector& chunks, + host_span out_buffers, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto const num_stripes = chunks.size().first; + auto const num_columns = chunks.size().second; + bool is_mask_updated = false; + + for (std::size_t col_idx = 0; col_idx < num_columns; ++col_idx) { + if (chunks[0][col_idx].parent_validity_info.valid_map_base != nullptr) { + if (not is_mask_updated) { + chunks.device_to_host_sync(stream); + is_mask_updated = true; + } + + auto parent_valid_map_base = chunks[0][col_idx].parent_validity_info.valid_map_base; + auto child_valid_map_base = out_buffers[col_idx].null_mask(); + auto child_mask_len = + chunks[0][col_idx].column_num_rows - chunks[0][col_idx].parent_validity_info.null_count; + auto parent_mask_len = chunks[0][col_idx].column_num_rows; + + if (child_valid_map_base != nullptr) { + rmm::device_uvector dst_idx(child_mask_len, stream); + // Copy indexes at which the parent has valid value. + thrust::copy_if(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + parent_mask_len, + dst_idx.begin(), + [parent_valid_map_base] __device__(auto idx) { + return bit_is_set(parent_valid_map_base, idx); + }); + + auto merged_null_mask = cudf::detail::create_null_mask( + parent_mask_len, mask_state::ALL_NULL, rmm::cuda_stream_view(stream), mr); + auto merged_mask = static_cast(merged_null_mask.data()); + uint32_t* dst_idx_ptr = dst_idx.data(); + // Copy child valid bits from child column to valid indexes, this will merge both child + // and parent null masks + thrust::for_each(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + dst_idx.size(), + [child_valid_map_base, dst_idx_ptr, merged_mask] __device__(auto idx) { + if (bit_is_set(child_valid_map_base, idx)) { + cudf::set_bit(merged_mask, dst_idx_ptr[idx]); + }; + }); + + out_buffers[col_idx].set_null_mask(std::move(merged_null_mask)); + + } else { + // Since child column doesn't have a mask, copy parent null mask + auto mask_size = bitmask_allocation_size_bytes(parent_mask_len); + out_buffers[col_idx].set_null_mask( + rmm::device_buffer(static_cast(parent_valid_map_base), mask_size, stream, mr)); + } + } + } + + if (is_mask_updated) { + // Update chunks with pointers to column data which might have been changed. + for (std::size_t stripe_idx = 0; stripe_idx < num_stripes; ++stripe_idx) { + for (std::size_t col_idx = 0; col_idx < num_columns; ++col_idx) { + auto& chunk = chunks[stripe_idx][col_idx]; + chunk.valid_map_base = out_buffers[col_idx].null_mask(); + } + } + chunks.host_to_device_sync(stream); + } +} + +/** + * @brief Converts the stripe column data and outputs to columns. + * + * @param num_dicts Number of dictionary entries required + * @param skip_rows Number of rows to offset from start + * @param row_index_stride Distance between each row index + * @param level Current nesting level being processed + * @param tz_table Local time to UTC conversion table + * @param chunks Vector of list of column chunk descriptors + * @param row_groups Vector of list of row index descriptors + * @param out_buffers Output columns' device buffers + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource to use for device memory allocation + */ +void decode_stream_data(std::size_t num_dicts, + std::size_t skip_rows, + std::size_t row_index_stride, + std::size_t level, + table_view const& tz_table, + cudf::detail::hostdevice_2dvector& chunks, + cudf::detail::device_2dspan row_groups, + std::vector& out_buffers, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto const num_stripes = chunks.size().first; + auto const num_columns = chunks.size().second; + thrust::counting_iterator col_idx_it(0); + thrust::counting_iterator stripe_idx_it(0); + + // Update chunks with pointers to column data + std::for_each(stripe_idx_it, stripe_idx_it + num_stripes, [&](auto stripe_idx) { + std::for_each(col_idx_it, col_idx_it + num_columns, [&](auto col_idx) { + auto& chunk = chunks[stripe_idx][col_idx]; + chunk.column_data_base = out_buffers[col_idx].data(); + chunk.valid_map_base = out_buffers[col_idx].null_mask(); + }); + }); + + // Allocate global dictionary for deserializing + rmm::device_uvector global_dict(num_dicts, stream); + + chunks.host_to_device_sync(stream); + gpu::DecodeNullsAndStringDictionaries( + chunks.base_device_ptr(), global_dict.data(), num_columns, num_stripes, skip_rows, stream); + + if (level > 0) { + // Update nullmasks for children if parent was a struct and had null mask + update_null_mask(chunks, out_buffers, stream, mr); + } + + auto const tz_table_dptr = table_device_view::create(tz_table, stream); + rmm::device_scalar error_count(0, stream); + // Update the null map for child columns + gpu::DecodeOrcColumnData(chunks.base_device_ptr(), + global_dict.data(), + row_groups, + num_columns, + num_stripes, + skip_rows, + *tz_table_dptr, + row_groups.size().first, + row_index_stride, + level, + error_count.data(), + stream); + chunks.device_to_host_async(stream); + // `value` synchronizes + auto const num_errors = error_count.value(stream); + CUDF_EXPECTS(num_errors == 0, "ORC data decode failed"); + + std::for_each(col_idx_it + 0, col_idx_it + num_columns, [&](auto col_idx) { + out_buffers[col_idx].null_count() = + std::accumulate(stripe_idx_it + 0, + stripe_idx_it + num_stripes, + 0, + [&](auto null_count, auto const stripe_idx) { + return null_count + chunks[stripe_idx][col_idx].null_count; + }); + }); +} + +/** + * @brief Compute the per-stripe prefix sum of null count, for each struct column in the current + * layer. + */ +void scan_null_counts(cudf::detail::hostdevice_2dvector const& chunks, + cudf::host_span> prefix_sums, + rmm::cuda_stream_view stream) +{ + auto const num_stripes = chunks.size().first; + if (num_stripes == 0) return; + + auto const num_columns = chunks.size().second; + std::vector>> prefix_sums_to_update; + for (auto col_idx = 0ul; col_idx < num_columns; ++col_idx) { + // Null counts sums are only needed for children of struct columns + if (chunks[0][col_idx].type_kind == STRUCT) { + prefix_sums_to_update.emplace_back(col_idx, prefix_sums[col_idx]); + } + } + auto const d_prefix_sums_to_update = cudf::detail::make_device_uvector_async( + prefix_sums_to_update, stream, rmm::mr::get_current_device_resource()); + + thrust::for_each(rmm::exec_policy(stream), + d_prefix_sums_to_update.begin(), + d_prefix_sums_to_update.end(), + [chunks = cudf::detail::device_2dspan{chunks}] __device__( + auto const& idx_psums) { + auto const col_idx = idx_psums.first; + auto const psums = idx_psums.second; + + thrust::transform( + thrust::seq, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + psums.size(), + psums.begin(), + [&](auto stripe_idx) { return chunks[stripe_idx][col_idx].null_count; }); + + thrust::inclusive_scan(thrust::seq, psums.begin(), psums.end(), psums.begin()); + }); + // `prefix_sums_to_update` goes out of scope, copy has to be done before we return + stream.synchronize(); +} + + +// TODO: this is called for each chunk of stripes. +/** + * @brief Aggregate child metadata from parent column chunks. + */ +void aggregate_child_meta(std::size_t level, + cudf::io::orc::detail::column_hierarchy const& selected_columns, + cudf::detail::host_2dspan chunks, + cudf::detail::host_2dspan row_groups, + host_span nested_cols, + host_span out_buffers, + reader_column_meta& col_meta) +{ + auto const num_of_stripes = chunks.size().first; + auto const num_of_rowgroups = row_groups.size().first; + auto const num_child_cols = selected_columns.levels[level + 1].size(); + auto const number_of_child_chunks = num_child_cols * num_of_stripes; + auto& num_child_rows = col_meta.num_child_rows; + auto& parent_column_data = col_meta.parent_column_data; + + // Reset the meta to store child column details. + num_child_rows.resize(selected_columns.levels[level + 1].size()); + std::fill(num_child_rows.begin(), num_child_rows.end(), 0); + parent_column_data.resize(number_of_child_chunks); + col_meta.parent_column_index.resize(number_of_child_chunks); + col_meta.child_start_row.resize(number_of_child_chunks); + col_meta.num_child_rows_per_stripe.resize(number_of_child_chunks); + col_meta.rwgrp_meta.resize(num_of_rowgroups * num_child_cols); + + auto child_start_row = cudf::detail::host_2dspan( + col_meta.child_start_row.data(), num_of_stripes, num_child_cols); + auto num_child_rows_per_stripe = cudf::detail::host_2dspan( + col_meta.num_child_rows_per_stripe.data(), num_of_stripes, num_child_cols); + auto rwgrp_meta = cudf::detail::host_2dspan( + col_meta.rwgrp_meta.data(), num_of_rowgroups, num_child_cols); + + int index = 0; // number of child column processed + + // For each parent column, update its child column meta for each stripe. + std::for_each(nested_cols.begin(), nested_cols.end(), [&](auto const p_col) { + auto const parent_col_idx = col_meta.orc_col_map[level][p_col.id]; + auto start_row = 0; + auto processed_row_groups = 0; + + for (std::size_t stripe_id = 0; stripe_id < num_of_stripes; stripe_id++) { + // Aggregate num_rows and start_row from processed parent columns per row groups + if (num_of_rowgroups) { + auto stripe_num_row_groups = chunks[stripe_id][parent_col_idx].num_rowgroups; + auto processed_child_rows = 0; + + for (std::size_t rowgroup_id = 0; rowgroup_id < stripe_num_row_groups; + rowgroup_id++, processed_row_groups++) { + auto const child_rows = row_groups[processed_row_groups][parent_col_idx].num_child_rows; + for (size_type id = 0; id < p_col.num_children; id++) { + auto const child_col_idx = index + id; + rwgrp_meta[processed_row_groups][child_col_idx].start_row = processed_child_rows; + rwgrp_meta[processed_row_groups][child_col_idx].num_rows = child_rows; + } + processed_child_rows += child_rows; + } + } + + // Aggregate start row, number of rows per chunk and total number of rows in a column + auto const child_rows = chunks[stripe_id][parent_col_idx].num_child_rows; + for (size_type id = 0; id < p_col.num_children; id++) { + auto const child_col_idx = index + id; + + // TODO: Check for overflow here. + num_child_rows[child_col_idx] += child_rows; + num_child_rows_per_stripe[stripe_id][child_col_idx] = child_rows; + // start row could be different for each column when there is nesting at each stripe level + child_start_row[stripe_id][child_col_idx] = (stripe_id == 0) ? 0 : start_row; + } + start_row += child_rows; + } + + // Parent column null mask and null count would be required for child column + // to adjust its nullmask. + auto type = out_buffers[parent_col_idx].type.id(); + auto parent_null_count = static_cast(out_buffers[parent_col_idx].null_count()); + auto parent_valid_map = out_buffers[parent_col_idx].null_mask(); + auto num_rows = out_buffers[parent_col_idx].size; + + for (size_type id = 0; id < p_col.num_children; id++) { + auto const child_col_idx = index + id; + col_meta.parent_column_index[child_col_idx] = parent_col_idx; + if (type == type_id::STRUCT) { + parent_column_data[child_col_idx] = {parent_valid_map, parent_null_count}; + // Number of rows in child will remain same as parent in case of struct column + num_child_rows[child_col_idx] = num_rows; + } else { + parent_column_data[child_col_idx] = {nullptr, 0}; + } + } + index += p_col.num_children; + }); +} + +/** + * @brief struct to store buffer data and size of list buffer + */ +struct list_buffer_data { + size_type* data; + size_type size; +}; + +// Generates offsets for list buffer from number of elements in a row. +void generate_offsets_for_list(host_span buff_data, rmm::cuda_stream_view stream) +{ + for (auto& list_data : buff_data) { + thrust::exclusive_scan(rmm::exec_policy_nosync(stream), + list_data.data, + list_data.data + list_data.size, + list_data.data); + } +} + +} // namespace + +void reader::impl::prepare_data(uint64_t skip_rows, + std::optional const& num_rows_opt, + std::vector> const& stripes) +{ + // Selected columns at different levels of nesting are stored in different elements + // of `selected_columns`; thus, size == 1 means no nested columns + CUDF_EXPECTS(skip_rows == 0 or _selected_columns.num_levels() == 1, + "skip_rows is not supported by nested columns"); + + // There are no columns in the table + if (_selected_columns.num_levels() == 0) { return; } + + global_preprocess(skip_rows, num_rows_opt, stripes); + + if (_file_itm_data.has_no_data()) { return; } + + // TODO: fix this, should be called once + while (_chunk_read_data.more_stripe_to_load()) { + read_data(); + } + + // Fix this, subpass should be call once + _chunk_read_data.curr_load_stripe_chunk = 0; + while (_chunk_read_data.more_stripe_to_load()) { + subpass_preprocess(); + } + + auto const rows_to_skip = _file_itm_data.rows_to_skip; + auto const rows_to_read = _file_itm_data.rows_to_read; + auto const& selected_stripes = _file_itm_data.selected_stripes; + + // Set up table for converting timestamp columns from local to UTC time + auto const tz_table = [&, &selected_stripes = selected_stripes] { + auto const has_timestamp_column = std::any_of( + _selected_columns.levels.cbegin(), _selected_columns.levels.cend(), [&](auto const& col_lvl) { + return std::any_of(col_lvl.cbegin(), col_lvl.cend(), [&](auto const& col_meta) { + return _metadata.get_col_type(col_meta.id).kind == TypeKind::TIMESTAMP; + }); + }); + + return has_timestamp_column ? cudf::detail::make_timezone_transition_table( + {}, selected_stripes[0].stripe_footer->writerTimezone, _stream) + : std::make_unique(); + }(); + + auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; + auto& null_count_prefix_sums = _file_itm_data.null_count_prefix_sums; + auto& lvl_chunks = _file_itm_data.lvl_data_chunks; + + // TODO: move this to global step + lvl_chunks.resize(_selected_columns.num_levels()); + _out_buffers.resize(_selected_columns.num_levels()); + + +// +// +// +// TODO: move this to reader_impl.cu, decomp and decode step + std::size_t num_stripes = selected_stripes.size(); + + // Iterates through levels of nested columns, child column will be one level down + // compared to parent column. + auto& col_meta = *_col_meta; + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + auto& columns_level = _selected_columns.levels[level]; + + // TODO: do it in global step + // Association between each ORC column and its cudf::column + std::vector nested_cols; + + // Get a list of column data types + std::vector column_types; + for (auto& col : columns_level) { + auto col_type = to_cudf_type(_metadata.get_col_type(col.id).kind, + _config.use_np_dtypes, + _config.timestamp_type.id(), + to_cudf_decimal_type(_config.decimal128_columns, _metadata, col.id)); + CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type"); + if (col_type == type_id::DECIMAL32 or col_type == type_id::DECIMAL64 or + col_type == type_id::DECIMAL128) { + // sign of the scale is changed since cuDF follows c++ libraries like CNL + // which uses negative scaling, but liborc and other libraries + // follow positive scaling. + auto const scale = + -static_cast(_metadata.get_col_type(col.id).scale.value_or(0)); + column_types.emplace_back(col_type, scale); + } else { + column_types.emplace_back(col_type); + } + + // Map each ORC column to its column + if (col_type == type_id::LIST or col_type == type_id::STRUCT) { + nested_cols.emplace_back(col); + } + } + + auto const num_columns = columns_level.size(); + auto& chunks = lvl_chunks[level]; + chunks = cudf::detail::hostdevice_2dvector(num_stripes, num_columns, _stream); + memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); + + const bool use_index = + _config.use_index && + // Do stripes have row group index + _metadata.is_row_grp_idx_present() && + // Only use if we don't have much work with complete columns & stripes + // TODO: Consider nrows, gpu, and tune the threshold + (rows_to_read > _metadata.get_row_index_stride() && !(_metadata.get_row_index_stride() & 7) && + _metadata.get_row_index_stride() > 0 && num_columns * num_stripes < 8 * 128) && + // Only use if first row is aligned to a stripe boundary + // TODO: Fix logic to handle unaligned rows + (rows_to_skip == 0); + + // Logically view streams as columns + auto const& stream_info = _file_itm_data.lvl_stream_info[level]; + + null_count_prefix_sums.emplace_back(); + null_count_prefix_sums.back().reserve(_selected_columns.levels[level].size()); + std::generate_n(std::back_inserter(null_count_prefix_sums.back()), + _selected_columns.levels[level].size(), + [&]() { + return cudf::detail::make_zeroed_device_uvector_async( + num_stripes, _stream, rmm::mr::get_current_device_resource()); + }); + + // Tracker for eventually deallocating compressed and uncompressed data + auto& stripe_data = lvl_stripe_data[level]; + + std::size_t stripe_start_row = 0; + std::size_t num_dict_entries = 0; + std::size_t num_rowgroups = 0; + + // TODO: Stripe and stream idx must be by chunk. + std::size_t stripe_idx = 0; + std::size_t stream_idx = 0; + + // std::vector, std::size_t>> read_tasks; + for (auto const& stripe : selected_stripes) { + auto const stripe_info = stripe.stripe_info; + auto const stripe_footer = stripe.stripe_footer; + + auto const total_data_size = gather_stream_info_and_column_desc(stripe_idx, + level, + stripe_info, + stripe_footer, + col_meta.orc_col_map[level], + _metadata.get_types(), + use_index, + level == 0, + &num_dict_entries, + &stream_idx, + std::nullopt, // stream_info + &chunks); + + auto const is_stripe_data_empty = total_data_size == 0; + CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, + "Invalid index rowgroup stream data"); + + auto dst_base = static_cast(stripe_data[stripe_idx].data()); + + auto const num_rows_per_stripe = stripe_info->numberOfRows; + auto const rowgroup_id = num_rowgroups; + auto stripe_num_rowgroups = 0; + if (use_index) { + stripe_num_rowgroups = (num_rows_per_stripe + _metadata.get_row_index_stride() - 1) / + _metadata.get_row_index_stride(); + } + // Update chunks to reference streams pointers + for (std::size_t col_idx = 0; col_idx < num_columns; col_idx++) { + auto& chunk = chunks[stripe_idx][col_idx]; + // start row, number of rows in a each stripe and total number of rows + // may change in lower levels of nesting + chunk.start_row = (level == 0) + ? stripe_start_row + : col_meta.child_start_row[stripe_idx * num_columns + col_idx]; + chunk.num_rows = (level == 0) + ? stripe_info->numberOfRows + : col_meta.num_child_rows_per_stripe[stripe_idx * num_columns + col_idx]; + chunk.column_num_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[col_idx]; + chunk.parent_validity_info = + (level == 0) ? column_validity_info{} : col_meta.parent_column_data[col_idx]; + chunk.parent_null_count_prefix_sums = + (level == 0) + ? nullptr + : null_count_prefix_sums[level - 1][col_meta.parent_column_index[col_idx]].data(); + chunk.encoding_kind = stripe_footer->columns[columns_level[col_idx].id].kind; + chunk.type_kind = + _metadata.per_file_metadata[stripe.source_idx].ff.types[columns_level[col_idx].id].kind; + // num_child_rows for a struct column will be same, for other nested types it will be + // calculated. + chunk.num_child_rows = (chunk.type_kind != orc::STRUCT) ? 0 : chunk.num_rows; + chunk.dtype_id = column_types[col_idx].id(); + chunk.decimal_scale = _metadata.per_file_metadata[stripe.source_idx] + .ff.types[columns_level[col_idx].id] + .scale.value_or(0); + + chunk.rowgroup_id = rowgroup_id; + chunk.dtype_len = (column_types[col_idx].id() == type_id::STRING) + ? sizeof(string_index_pair) + : ((column_types[col_idx].id() == type_id::LIST) or + (column_types[col_idx].id() == type_id::STRUCT)) + ? sizeof(size_type) + : cudf::size_of(column_types[col_idx]); + chunk.num_rowgroups = stripe_num_rowgroups; + if (chunk.type_kind == orc::TIMESTAMP) { chunk.timestamp_type_id = _config.timestamp_type.id(); } + if (not is_stripe_data_empty) { + for (int k = 0; k < gpu::CI_NUM_STREAMS; k++) { + chunk.streams[k] = dst_base + stream_info[chunk.strm_id[k]].dst_pos; + } + } + } + stripe_start_row += num_rows_per_stripe; + num_rowgroups += stripe_num_rowgroups; + + stripe_idx++; + } + + if (stripe_data.empty()) { continue; } + + // Process dataset chunk pages into output columns + auto row_groups = + cudf::detail::hostdevice_2dvector(num_rowgroups, num_columns, _stream); + if (level > 0 and row_groups.size().first) { + cudf::host_span row_groups_span(row_groups.base_host_ptr(), + num_rowgroups * num_columns); + auto& rw_grp_meta = col_meta.rwgrp_meta; + + // Update start row and num rows per row group + std::transform(rw_grp_meta.begin(), + rw_grp_meta.end(), + row_groups_span.begin(), + rw_grp_meta.begin(), + [&](auto meta, auto& row_grp) { + row_grp.num_rows = meta.num_rows; + row_grp.start_row = meta.start_row; + return meta; + }); + } + // Setup row group descriptors if using indexes + if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { + auto decomp_data = decompress_stripe_data(_file_itm_data.compinfo_map, + *_metadata.per_file_metadata[0].decompressor, + stripe_data, + stream_info, + chunks, + row_groups, + num_stripes, + _metadata.get_row_index_stride(), + level == 0, + _stream); + stripe_data.clear(); + stripe_data.push_back(std::move(decomp_data)); + } else { + if (row_groups.size().first) { + chunks.host_to_device_async(_stream); + row_groups.host_to_device_async(_stream); + row_groups.host_to_device_async(_stream); + gpu::ParseRowGroupIndex(row_groups.base_device_ptr(), + nullptr, + chunks.base_device_ptr(), + num_columns, + num_stripes, + num_rowgroups, + _metadata.get_row_index_stride(), + level == 0, + _stream); + } + } + + for (std::size_t i = 0; i < column_types.size(); ++i) { + bool is_nullable = false; + for (std::size_t j = 0; j < num_stripes; ++j) { + if (chunks[j][i].strm_len[gpu::CI_PRESENT] != 0) { + is_nullable = true; + break; + } + } + auto is_list_type = (column_types[i].id() == type_id::LIST); + auto n_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[i]; + // For list column, offset column will be always size + 1 + if (is_list_type) n_rows++; + _out_buffers[level].emplace_back(column_types[i], n_rows, is_nullable, _stream, _mr); + } + + decode_stream_data(num_dict_entries, + rows_to_skip, + _metadata.get_row_index_stride(), + level, + tz_table->view(), + chunks, + row_groups, + _out_buffers[level], + _stream, + _mr); + + if (nested_cols.size()) { + // Extract information to process nested child columns + scan_null_counts(chunks, null_count_prefix_sums[level], _stream); + + row_groups.device_to_host_sync(_stream); + aggregate_child_meta( + level, _selected_columns, chunks, row_groups, nested_cols, _out_buffers[level], col_meta); + + // ORC stores number of elements at each row, so we need to generate offsets from that + std::vector buff_data; + std::for_each( + _out_buffers[level].begin(), _out_buffers[level].end(), [&buff_data](auto& out_buffer) { + if (out_buffer.type.id() == type_id::LIST) { + auto data = static_cast(out_buffer.data()); + buff_data.emplace_back(list_buffer_data{data, out_buffer.size}); + } + }); + + if (not buff_data.empty()) { generate_offsets_for_list(buff_data, _stream); } + } + } // end loop level +} + + reader::impl::impl(std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu deleted file mode 100644 index 90404c7b9ca..00000000000 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ /dev/null @@ -1,980 +0,0 @@ -/* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// #define PRINT_DEBUG - -#include "reader_impl.hpp" -#include "reader_impl_chunking.hpp" -#include "reader_impl_helpers.hpp" - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace cudf::io::orc::detail { - -namespace { - -// TODO: update -/** - * @brief Decompresses the stripe data, at stream granularity. - * - * @param decompressor Block decompressor - * @param stripe_data List of source stripe column data - * @param stream_info List of stream to column mappings - * @param chunks Vector of list of column chunk descriptors - * @param row_groups Vector of list of row index descriptors - * @param num_stripes Number of stripes making up column chunks - * @param row_index_stride Distance between each row index - * @param use_base_stride Whether to use base stride obtained from meta or use the computed value - * @param stream CUDA stream used for device memory operations and kernel launches - * @return Device buffer to decompressed page data - */ -rmm::device_buffer decompress_stripe_data( - stream_id_map const& compinfo_map, - OrcDecompressor const& decompressor, - host_span stripe_data, - host_span stream_info, - cudf::detail::hostdevice_2dvector& chunks, - cudf::detail::hostdevice_2dvector& row_groups, - std::size_t num_stripes, - std::size_t row_index_stride, - bool use_base_stride, - rmm::cuda_stream_view stream) -{ - // Count the exact number of compressed blocks - std::size_t num_compressed_blocks = 0; - std::size_t num_uncompressed_blocks = 0; - std::size_t total_decomp_size = 0; - - cudf::detail::hostdevice_vector compinfo( - 0, stream_info.size(), stream); - - for (auto const& info : stream_info) { -#ifdef PRINT_DEBUG - printf("collec stream again [%d, %d, %d, %d]: dst = %lu, length = %lu\n", - (int)info.id.stripe_idx, - (int)info.id.level, - (int)info.id.orc_cold_idx, - (int)info.id.kind, - info.dst_pos, - info.length); - fflush(stdout); -#endif - - compinfo.push_back(gpu::CompressedStreamInfo( - static_cast(stripe_data[info.id.stripe_idx].data()) + info.dst_pos, - info.length)); - - // printf("line %d\n", __LINE__); - // fflush(stdout); - auto const& cached_comp_info = - compinfo_map.at(stream_id_info{info.id.stripe_idx, info.id.level, info.id.orc_cold_idx, info.id.kind}); - // printf("line %d\n", __LINE__); - // fflush(stdout); - // auto const& cached_comp_info = - // compinfo_map[stream_id_info{info.id.stripe_idx, info.id.level, info.id.orc_cold_idx, info.id.kind}]; - auto& stream_comp_info = compinfo[compinfo.size() - 1]; - stream_comp_info.num_compressed_blocks = cached_comp_info.num_compressed_blocks; - stream_comp_info.num_uncompressed_blocks = cached_comp_info.num_uncompressed_blocks; - stream_comp_info.max_uncompressed_size = cached_comp_info.total_decomp_size; - - num_compressed_blocks += cached_comp_info.num_compressed_blocks; - num_uncompressed_blocks += cached_comp_info.num_uncompressed_blocks; - total_decomp_size += cached_comp_info.total_decomp_size; - } - - CUDF_EXPECTS( - not((num_uncompressed_blocks + num_compressed_blocks > 0) and (total_decomp_size == 0)), - "Inconsistent info on compression blocks"); - -#ifdef XXX - std::size_t old_num_compressed_blocks = num_compressed_blocks; - std::size_t old_num_uncompressed_blocks = num_uncompressed_blocks; - std::size_t old_total_decomp_size = total_decomp_size; - - num_compressed_blocks = 0; - num_uncompressed_blocks = 0; - total_decomp_size = 0; - for (std::size_t i = 0; i < compinfo.size(); ++i) { - num_compressed_blocks += compinfo[i].num_compressed_blocks; - num_uncompressed_blocks += compinfo[i].num_uncompressed_blocks; - total_decomp_size += compinfo[i].max_uncompressed_size; - - auto const& info = stream_info[i]; - printf("compute info [%d, %d, %d, %d]: %lu | %lu | %lu\n", - (int)info.id.stripe_idx, - (int)info.id.level, - (int)info.id.orc_cold_idx, - (int)info.id.kind, - (size_t)compinfo[i].num_compressed_blocks, - (size_t)compinfo[i].num_uncompressed_blocks, - compinfo[i].max_uncompressed_size); - fflush(stdout); - } - - if (old_num_compressed_blocks != num_compressed_blocks || - old_num_uncompressed_blocks != num_uncompressed_blocks || - old_total_decomp_size != total_decomp_size) { - printf("invalid: %d - %d, %d - %d, %d - %d\n", - (int)old_num_compressed_blocks, - (int)num_compressed_blocks, - (int)old_num_uncompressed_blocks, - (int)num_uncompressed_blocks, - (int)old_total_decomp_size, - (int)total_decomp_size - - ); - } -#endif - - // Buffer needs to be padded. - // Required by `gpuDecodeOrcColumnData`. - rmm::device_buffer decomp_data( - cudf::util::round_up_safe(total_decomp_size, BUFFER_PADDING_MULTIPLE), stream); - if (decomp_data.is_empty()) { return decomp_data; } - - rmm::device_uvector> inflate_in( - num_compressed_blocks + num_uncompressed_blocks, stream); - rmm::device_uvector> inflate_out( - num_compressed_blocks + num_uncompressed_blocks, stream); - rmm::device_uvector inflate_res(num_compressed_blocks, stream); - thrust::fill(rmm::exec_policy(stream), - inflate_res.begin(), - inflate_res.end(), - compression_result{0, compression_status::FAILURE}); - - // Parse again to populate the decompression input/output buffers - std::size_t decomp_offset = 0; - uint32_t max_uncomp_block_size = 0; - uint32_t start_pos = 0; - auto start_pos_uncomp = (uint32_t)num_compressed_blocks; - for (std::size_t i = 0; i < compinfo.size(); ++i) { - auto dst_base = static_cast(decomp_data.data()); - compinfo[i].uncompressed_data = dst_base + decomp_offset; - compinfo[i].dec_in_ctl = inflate_in.data() + start_pos; - compinfo[i].dec_out_ctl = inflate_out.data() + start_pos; - compinfo[i].dec_res = {inflate_res.data() + start_pos, compinfo[i].num_compressed_blocks}; - compinfo[i].copy_in_ctl = inflate_in.data() + start_pos_uncomp; - compinfo[i].copy_out_ctl = inflate_out.data() + start_pos_uncomp; - - // stream_info[i].dst_pos = decomp_offset; - decomp_offset += compinfo[i].max_uncompressed_size; - start_pos += compinfo[i].num_compressed_blocks; - start_pos_uncomp += compinfo[i].num_uncompressed_blocks; - max_uncomp_block_size = - std::max(max_uncomp_block_size, compinfo[i].max_uncompressed_block_size); - } - compinfo.host_to_device_async(stream); - gpu::ParseCompressedStripeData(compinfo.device_ptr(), - compinfo.size(), - decompressor.GetBlockSize(), - decompressor.GetLog2MaxCompressionRatio(), - stream); - - // Value for checking whether we decompress successfully. - // It doesn't need to be atomic as there is no race condition: we only write `true` if needed. - cudf::detail::hostdevice_vector any_block_failure(1, stream); - any_block_failure[0] = false; - any_block_failure.host_to_device_async(stream); - - // Dispatch batches of blocks to decompress - if (num_compressed_blocks > 0) { - device_span> inflate_in_view{inflate_in.data(), - num_compressed_blocks}; - device_span> inflate_out_view{inflate_out.data(), num_compressed_blocks}; - switch (decompressor.compression()) { - case compression_type::ZLIB: - if (nvcomp::is_decompression_disabled(nvcomp::compression_type::DEFLATE)) { - gpuinflate( - inflate_in_view, inflate_out_view, inflate_res, gzip_header_included::NO, stream); - } else { - nvcomp::batched_decompress(nvcomp::compression_type::DEFLATE, - inflate_in_view, - inflate_out_view, - inflate_res, - max_uncomp_block_size, - total_decomp_size, - stream); - } - break; - case compression_type::SNAPPY: - if (nvcomp::is_decompression_disabled(nvcomp::compression_type::SNAPPY)) { - gpu_unsnap(inflate_in_view, inflate_out_view, inflate_res, stream); - } else { - nvcomp::batched_decompress(nvcomp::compression_type::SNAPPY, - inflate_in_view, - inflate_out_view, - inflate_res, - max_uncomp_block_size, - total_decomp_size, - stream); - } - break; - case compression_type::ZSTD: - if (auto const reason = nvcomp::is_decompression_disabled(nvcomp::compression_type::ZSTD); - reason) { - CUDF_FAIL("Decompression error: " + reason.value()); - } - nvcomp::batched_decompress(nvcomp::compression_type::ZSTD, - inflate_in_view, - inflate_out_view, - inflate_res, - max_uncomp_block_size, - total_decomp_size, - stream); - break; - default: CUDF_FAIL("Unexpected decompression dispatch"); break; - } - - // TODO: proclam return type - - // Check if any block has been failed to decompress. - // Not using `thrust::any` or `thrust::count_if` to defer stream sync. - thrust::for_each( - rmm::exec_policy(stream), - thrust::make_counting_iterator(std::size_t{0}), - thrust::make_counting_iterator(inflate_res.size()), - [results = inflate_res.begin(), - any_block_failure = any_block_failure.device_ptr()] __device__(auto const idx) { - if (results[idx].status != compression_status::SUCCESS) { *any_block_failure = true; } - }); - } - - if (num_uncompressed_blocks > 0) { - device_span> copy_in_view{inflate_in.data() + num_compressed_blocks, - num_uncompressed_blocks}; - device_span> copy_out_view{inflate_out.data() + num_compressed_blocks, - num_uncompressed_blocks}; - gpu_copy_uncompressed_blocks(copy_in_view, copy_out_view, stream); - } - - // Copy without stream sync, thus need to wait for stream sync below to access. - any_block_failure.device_to_host_async(stream); - - gpu::PostDecompressionReassemble(compinfo.device_ptr(), compinfo.size(), stream); - compinfo.device_to_host_sync(stream); // This also sync stream for `any_block_failure`. - - // We can check on host after stream synchronize - CUDF_EXPECTS(not any_block_failure[0], "Error during decompression"); - - auto const num_columns = chunks.size().second; - - // Update the stream information with the updated uncompressed info - // TBD: We could update the value from the information we already - // have in stream_info[], but using the gpu results also updates - // max_uncompressed_size to the actual uncompressed size, or zero if - // decompression failed. - for (std::size_t i = 0; i < num_stripes; ++i) { - for (std::size_t j = 0; j < num_columns; ++j) { - auto& chunk = chunks[i][j]; - for (int k = 0; k < gpu::CI_NUM_STREAMS; ++k) { - if (chunk.strm_len[k] > 0 && chunk.strm_id[k] < compinfo.size()) { - chunk.streams[k] = compinfo[chunk.strm_id[k]].uncompressed_data; - chunk.strm_len[k] = compinfo[chunk.strm_id[k]].max_uncompressed_size; - } - } - } - } - - if (row_groups.size().first) { - chunks.host_to_device_async(stream); - row_groups.host_to_device_async(stream); - gpu::ParseRowGroupIndex(row_groups.base_device_ptr(), - compinfo.device_ptr(), - chunks.base_device_ptr(), - num_columns, - num_stripes, - row_groups.size().first, - row_index_stride, - use_base_stride, - stream); - } - - return decomp_data; -} - -/** - * @brief Updates null mask of columns whose parent is a struct column. - * - * If struct column has null element, that row would be skipped while writing child column in ORC, - * so we need to insert the missing null elements in child column. There is another behavior from - * pyspark, where if the child column doesn't have any null elements, it will not have present - * stream, so in that case parent null mask need to be copied to child column. - * - * @param chunks Vector of list of column chunk descriptors - * @param out_buffers Output columns' device buffers - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource to use for device memory allocation - */ -void update_null_mask(cudf::detail::hostdevice_2dvector& chunks, - host_span out_buffers, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - auto const num_stripes = chunks.size().first; - auto const num_columns = chunks.size().second; - bool is_mask_updated = false; - - for (std::size_t col_idx = 0; col_idx < num_columns; ++col_idx) { - if (chunks[0][col_idx].parent_validity_info.valid_map_base != nullptr) { - if (not is_mask_updated) { - chunks.device_to_host_sync(stream); - is_mask_updated = true; - } - - auto parent_valid_map_base = chunks[0][col_idx].parent_validity_info.valid_map_base; - auto child_valid_map_base = out_buffers[col_idx].null_mask(); - auto child_mask_len = - chunks[0][col_idx].column_num_rows - chunks[0][col_idx].parent_validity_info.null_count; - auto parent_mask_len = chunks[0][col_idx].column_num_rows; - - if (child_valid_map_base != nullptr) { - rmm::device_uvector dst_idx(child_mask_len, stream); - // Copy indexes at which the parent has valid value. - thrust::copy_if(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(0) + parent_mask_len, - dst_idx.begin(), - [parent_valid_map_base] __device__(auto idx) { - return bit_is_set(parent_valid_map_base, idx); - }); - - auto merged_null_mask = cudf::detail::create_null_mask( - parent_mask_len, mask_state::ALL_NULL, rmm::cuda_stream_view(stream), mr); - auto merged_mask = static_cast(merged_null_mask.data()); - uint32_t* dst_idx_ptr = dst_idx.data(); - // Copy child valid bits from child column to valid indexes, this will merge both child - // and parent null masks - thrust::for_each(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(0) + dst_idx.size(), - [child_valid_map_base, dst_idx_ptr, merged_mask] __device__(auto idx) { - if (bit_is_set(child_valid_map_base, idx)) { - cudf::set_bit(merged_mask, dst_idx_ptr[idx]); - }; - }); - - out_buffers[col_idx].set_null_mask(std::move(merged_null_mask)); - - } else { - // Since child column doesn't have a mask, copy parent null mask - auto mask_size = bitmask_allocation_size_bytes(parent_mask_len); - out_buffers[col_idx].set_null_mask( - rmm::device_buffer(static_cast(parent_valid_map_base), mask_size, stream, mr)); - } - } - } - - if (is_mask_updated) { - // Update chunks with pointers to column data which might have been changed. - for (std::size_t stripe_idx = 0; stripe_idx < num_stripes; ++stripe_idx) { - for (std::size_t col_idx = 0; col_idx < num_columns; ++col_idx) { - auto& chunk = chunks[stripe_idx][col_idx]; - chunk.valid_map_base = out_buffers[col_idx].null_mask(); - } - } - chunks.host_to_device_sync(stream); - } -} - -/** - * @brief Converts the stripe column data and outputs to columns. - * - * @param num_dicts Number of dictionary entries required - * @param skip_rows Number of rows to offset from start - * @param row_index_stride Distance between each row index - * @param level Current nesting level being processed - * @param tz_table Local time to UTC conversion table - * @param chunks Vector of list of column chunk descriptors - * @param row_groups Vector of list of row index descriptors - * @param out_buffers Output columns' device buffers - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource to use for device memory allocation - */ -void decode_stream_data(std::size_t num_dicts, - std::size_t skip_rows, - std::size_t row_index_stride, - std::size_t level, - table_view const& tz_table, - cudf::detail::hostdevice_2dvector& chunks, - cudf::detail::device_2dspan row_groups, - std::vector& out_buffers, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - auto const num_stripes = chunks.size().first; - auto const num_columns = chunks.size().second; - thrust::counting_iterator col_idx_it(0); - thrust::counting_iterator stripe_idx_it(0); - - // Update chunks with pointers to column data - std::for_each(stripe_idx_it, stripe_idx_it + num_stripes, [&](auto stripe_idx) { - std::for_each(col_idx_it, col_idx_it + num_columns, [&](auto col_idx) { - auto& chunk = chunks[stripe_idx][col_idx]; - chunk.column_data_base = out_buffers[col_idx].data(); - chunk.valid_map_base = out_buffers[col_idx].null_mask(); - }); - }); - - // Allocate global dictionary for deserializing - rmm::device_uvector global_dict(num_dicts, stream); - - chunks.host_to_device_sync(stream); - gpu::DecodeNullsAndStringDictionaries( - chunks.base_device_ptr(), global_dict.data(), num_columns, num_stripes, skip_rows, stream); - - if (level > 0) { - // Update nullmasks for children if parent was a struct and had null mask - update_null_mask(chunks, out_buffers, stream, mr); - } - - auto const tz_table_dptr = table_device_view::create(tz_table, stream); - rmm::device_scalar error_count(0, stream); - // Update the null map for child columns - gpu::DecodeOrcColumnData(chunks.base_device_ptr(), - global_dict.data(), - row_groups, - num_columns, - num_stripes, - skip_rows, - *tz_table_dptr, - row_groups.size().first, - row_index_stride, - level, - error_count.data(), - stream); - chunks.device_to_host_async(stream); - // `value` synchronizes - auto const num_errors = error_count.value(stream); - CUDF_EXPECTS(num_errors == 0, "ORC data decode failed"); - - std::for_each(col_idx_it + 0, col_idx_it + num_columns, [&](auto col_idx) { - out_buffers[col_idx].null_count() = - std::accumulate(stripe_idx_it + 0, - stripe_idx_it + num_stripes, - 0, - [&](auto null_count, auto const stripe_idx) { - return null_count + chunks[stripe_idx][col_idx].null_count; - }); - }); -} - -/** - * @brief Compute the per-stripe prefix sum of null count, for each struct column in the current - * layer. - */ -void scan_null_counts(cudf::detail::hostdevice_2dvector const& chunks, - cudf::host_span> prefix_sums, - rmm::cuda_stream_view stream) -{ - auto const num_stripes = chunks.size().first; - if (num_stripes == 0) return; - - auto const num_columns = chunks.size().second; - std::vector>> prefix_sums_to_update; - for (auto col_idx = 0ul; col_idx < num_columns; ++col_idx) { - // Null counts sums are only needed for children of struct columns - if (chunks[0][col_idx].type_kind == STRUCT) { - prefix_sums_to_update.emplace_back(col_idx, prefix_sums[col_idx]); - } - } - auto const d_prefix_sums_to_update = cudf::detail::make_device_uvector_async( - prefix_sums_to_update, stream, rmm::mr::get_current_device_resource()); - - thrust::for_each(rmm::exec_policy(stream), - d_prefix_sums_to_update.begin(), - d_prefix_sums_to_update.end(), - [chunks = cudf::detail::device_2dspan{chunks}] __device__( - auto const& idx_psums) { - auto const col_idx = idx_psums.first; - auto const psums = idx_psums.second; - - thrust::transform( - thrust::seq, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(0) + psums.size(), - psums.begin(), - [&](auto stripe_idx) { return chunks[stripe_idx][col_idx].null_count; }); - - thrust::inclusive_scan(thrust::seq, psums.begin(), psums.end(), psums.begin()); - }); - // `prefix_sums_to_update` goes out of scope, copy has to be done before we return - stream.synchronize(); -} - - -// TODO: this is called for each chunk of stripes. -/** - * @brief Aggregate child metadata from parent column chunks. - */ -void aggregate_child_meta(std::size_t level, - cudf::io::orc::detail::column_hierarchy const& selected_columns, - cudf::detail::host_2dspan chunks, - cudf::detail::host_2dspan row_groups, - host_span nested_cols, - host_span out_buffers, - reader_column_meta& col_meta) -{ - auto const num_of_stripes = chunks.size().first; - auto const num_of_rowgroups = row_groups.size().first; - auto const num_child_cols = selected_columns.levels[level + 1].size(); - auto const number_of_child_chunks = num_child_cols * num_of_stripes; - auto& num_child_rows = col_meta.num_child_rows; - auto& parent_column_data = col_meta.parent_column_data; - - // Reset the meta to store child column details. - num_child_rows.resize(selected_columns.levels[level + 1].size()); - std::fill(num_child_rows.begin(), num_child_rows.end(), 0); - parent_column_data.resize(number_of_child_chunks); - col_meta.parent_column_index.resize(number_of_child_chunks); - col_meta.child_start_row.resize(number_of_child_chunks); - col_meta.num_child_rows_per_stripe.resize(number_of_child_chunks); - col_meta.rwgrp_meta.resize(num_of_rowgroups * num_child_cols); - - auto child_start_row = cudf::detail::host_2dspan( - col_meta.child_start_row.data(), num_of_stripes, num_child_cols); - auto num_child_rows_per_stripe = cudf::detail::host_2dspan( - col_meta.num_child_rows_per_stripe.data(), num_of_stripes, num_child_cols); - auto rwgrp_meta = cudf::detail::host_2dspan( - col_meta.rwgrp_meta.data(), num_of_rowgroups, num_child_cols); - - int index = 0; // number of child column processed - - // For each parent column, update its child column meta for each stripe. - std::for_each(nested_cols.begin(), nested_cols.end(), [&](auto const p_col) { - auto const parent_col_idx = col_meta.orc_col_map[level][p_col.id]; - auto start_row = 0; - auto processed_row_groups = 0; - - for (std::size_t stripe_id = 0; stripe_id < num_of_stripes; stripe_id++) { - // Aggregate num_rows and start_row from processed parent columns per row groups - if (num_of_rowgroups) { - auto stripe_num_row_groups = chunks[stripe_id][parent_col_idx].num_rowgroups; - auto processed_child_rows = 0; - - for (std::size_t rowgroup_id = 0; rowgroup_id < stripe_num_row_groups; - rowgroup_id++, processed_row_groups++) { - auto const child_rows = row_groups[processed_row_groups][parent_col_idx].num_child_rows; - for (size_type id = 0; id < p_col.num_children; id++) { - auto const child_col_idx = index + id; - rwgrp_meta[processed_row_groups][child_col_idx].start_row = processed_child_rows; - rwgrp_meta[processed_row_groups][child_col_idx].num_rows = child_rows; - } - processed_child_rows += child_rows; - } - } - - // Aggregate start row, number of rows per chunk and total number of rows in a column - auto const child_rows = chunks[stripe_id][parent_col_idx].num_child_rows; - for (size_type id = 0; id < p_col.num_children; id++) { - auto const child_col_idx = index + id; - - // TODO: Check for overflow here. - num_child_rows[child_col_idx] += child_rows; - num_child_rows_per_stripe[stripe_id][child_col_idx] = child_rows; - // start row could be different for each column when there is nesting at each stripe level - child_start_row[stripe_id][child_col_idx] = (stripe_id == 0) ? 0 : start_row; - } - start_row += child_rows; - } - - // Parent column null mask and null count would be required for child column - // to adjust its nullmask. - auto type = out_buffers[parent_col_idx].type.id(); - auto parent_null_count = static_cast(out_buffers[parent_col_idx].null_count()); - auto parent_valid_map = out_buffers[parent_col_idx].null_mask(); - auto num_rows = out_buffers[parent_col_idx].size; - - for (size_type id = 0; id < p_col.num_children; id++) { - auto const child_col_idx = index + id; - col_meta.parent_column_index[child_col_idx] = parent_col_idx; - if (type == type_id::STRUCT) { - parent_column_data[child_col_idx] = {parent_valid_map, parent_null_count}; - // Number of rows in child will remain same as parent in case of struct column - num_child_rows[child_col_idx] = num_rows; - } else { - parent_column_data[child_col_idx] = {nullptr, 0}; - } - } - index += p_col.num_children; - }); -} - -/** - * @brief struct to store buffer data and size of list buffer - */ -struct list_buffer_data { - size_type* data; - size_type size; -}; - -// Generates offsets for list buffer from number of elements in a row. -void generate_offsets_for_list(host_span buff_data, rmm::cuda_stream_view stream) -{ - for (auto& list_data : buff_data) { - thrust::exclusive_scan(rmm::exec_policy_nosync(stream), - list_data.data, - list_data.data + list_data.size, - list_data.data); - } -} - -} // namespace - -void reader::impl::prepare_data(uint64_t skip_rows, - std::optional const& num_rows_opt, - std::vector> const& stripes) -{ - // Selected columns at different levels of nesting are stored in different elements - // of `selected_columns`; thus, size == 1 means no nested columns - CUDF_EXPECTS(skip_rows == 0 or _selected_columns.num_levels() == 1, - "skip_rows is not supported by nested columns"); - - // There are no columns in the table - if (_selected_columns.num_levels() == 0) { return; } - - global_preprocess(skip_rows, num_rows_opt, stripes); - - if (_file_itm_data.has_no_data()) { return; } - - // TODO: fix this, should be called once - while (_chunk_read_data.more_stripe_to_load()) { - read_data(); - } - - // Fix this, subpass should be call once - _chunk_read_data.curr_load_stripe_chunk = 0; - while (_chunk_read_data.more_stripe_to_load()) { - subpass_preprocess(); - } - - auto const rows_to_skip = _file_itm_data.rows_to_skip; - auto const rows_to_read = _file_itm_data.rows_to_read; - auto const& selected_stripes = _file_itm_data.selected_stripes; - - // Set up table for converting timestamp columns from local to UTC time - auto const tz_table = [&, &selected_stripes = selected_stripes] { - auto const has_timestamp_column = std::any_of( - _selected_columns.levels.cbegin(), _selected_columns.levels.cend(), [&](auto const& col_lvl) { - return std::any_of(col_lvl.cbegin(), col_lvl.cend(), [&](auto const& col_meta) { - return _metadata.get_col_type(col_meta.id).kind == TypeKind::TIMESTAMP; - }); - }); - - return has_timestamp_column ? cudf::detail::make_timezone_transition_table( - {}, selected_stripes[0].stripe_footer->writerTimezone, _stream) - : std::make_unique(); - }(); - - auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; - auto& null_count_prefix_sums = _file_itm_data.null_count_prefix_sums; - auto& lvl_chunks = _file_itm_data.lvl_data_chunks; - - // TODO: move this to global step - lvl_chunks.resize(_selected_columns.num_levels()); - _out_buffers.resize(_selected_columns.num_levels()); - - -// -// -// -// TODO: move this to reader_impl.cu, decomp and decode step - std::size_t num_stripes = selected_stripes.size(); - - // Iterates through levels of nested columns, child column will be one level down - // compared to parent column. - auto& col_meta = *_col_meta; - for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { - auto& columns_level = _selected_columns.levels[level]; - - // TODO: do it in global step - // Association between each ORC column and its cudf::column - std::vector nested_cols; - - // Get a list of column data types - std::vector column_types; - for (auto& col : columns_level) { - auto col_type = to_cudf_type(_metadata.get_col_type(col.id).kind, - _config.use_np_dtypes, - _config.timestamp_type.id(), - to_cudf_decimal_type(_config.decimal128_columns, _metadata, col.id)); - CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type"); - if (col_type == type_id::DECIMAL32 or col_type == type_id::DECIMAL64 or - col_type == type_id::DECIMAL128) { - // sign of the scale is changed since cuDF follows c++ libraries like CNL - // which uses negative scaling, but liborc and other libraries - // follow positive scaling. - auto const scale = - -static_cast(_metadata.get_col_type(col.id).scale.value_or(0)); - column_types.emplace_back(col_type, scale); - } else { - column_types.emplace_back(col_type); - } - - // Map each ORC column to its column - if (col_type == type_id::LIST or col_type == type_id::STRUCT) { - nested_cols.emplace_back(col); - } - } - - auto const num_columns = columns_level.size(); - auto& chunks = lvl_chunks[level]; - chunks = cudf::detail::hostdevice_2dvector(num_stripes, num_columns, _stream); - memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); - - const bool use_index = - _config.use_index && - // Do stripes have row group index - _metadata.is_row_grp_idx_present() && - // Only use if we don't have much work with complete columns & stripes - // TODO: Consider nrows, gpu, and tune the threshold - (rows_to_read > _metadata.get_row_index_stride() && !(_metadata.get_row_index_stride() & 7) && - _metadata.get_row_index_stride() > 0 && num_columns * num_stripes < 8 * 128) && - // Only use if first row is aligned to a stripe boundary - // TODO: Fix logic to handle unaligned rows - (rows_to_skip == 0); - - // Logically view streams as columns - auto const& stream_info = _file_itm_data.lvl_stream_info[level]; - - null_count_prefix_sums.emplace_back(); - null_count_prefix_sums.back().reserve(_selected_columns.levels[level].size()); - std::generate_n(std::back_inserter(null_count_prefix_sums.back()), - _selected_columns.levels[level].size(), - [&]() { - return cudf::detail::make_zeroed_device_uvector_async( - num_stripes, _stream, rmm::mr::get_current_device_resource()); - }); - - // Tracker for eventually deallocating compressed and uncompressed data - auto& stripe_data = lvl_stripe_data[level]; - - std::size_t stripe_start_row = 0; - std::size_t num_dict_entries = 0; - std::size_t num_rowgroups = 0; - - // TODO: Stripe and stream idx must be by chunk. - std::size_t stripe_idx = 0; - std::size_t stream_idx = 0; - - // std::vector, std::size_t>> read_tasks; - for (auto const& stripe : selected_stripes) { - auto const stripe_info = stripe.stripe_info; - auto const stripe_footer = stripe.stripe_footer; - - auto const total_data_size = gather_stream_info_and_column_desc(stripe_idx, - level, - stripe_info, - stripe_footer, - col_meta.orc_col_map[level], - _metadata.get_types(), - use_index, - level == 0, - &num_dict_entries, - &stream_idx, - std::nullopt, // stream_info - &chunks); - - auto const is_stripe_data_empty = total_data_size == 0; - CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, - "Invalid index rowgroup stream data"); - - auto dst_base = static_cast(stripe_data[stripe_idx].data()); - - auto const num_rows_per_stripe = stripe_info->numberOfRows; - auto const rowgroup_id = num_rowgroups; - auto stripe_num_rowgroups = 0; - if (use_index) { - stripe_num_rowgroups = (num_rows_per_stripe + _metadata.get_row_index_stride() - 1) / - _metadata.get_row_index_stride(); - } - // Update chunks to reference streams pointers - for (std::size_t col_idx = 0; col_idx < num_columns; col_idx++) { - auto& chunk = chunks[stripe_idx][col_idx]; - // start row, number of rows in a each stripe and total number of rows - // may change in lower levels of nesting - chunk.start_row = (level == 0) - ? stripe_start_row - : col_meta.child_start_row[stripe_idx * num_columns + col_idx]; - chunk.num_rows = (level == 0) - ? stripe_info->numberOfRows - : col_meta.num_child_rows_per_stripe[stripe_idx * num_columns + col_idx]; - chunk.column_num_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[col_idx]; - chunk.parent_validity_info = - (level == 0) ? column_validity_info{} : col_meta.parent_column_data[col_idx]; - chunk.parent_null_count_prefix_sums = - (level == 0) - ? nullptr - : null_count_prefix_sums[level - 1][col_meta.parent_column_index[col_idx]].data(); - chunk.encoding_kind = stripe_footer->columns[columns_level[col_idx].id].kind; - chunk.type_kind = - _metadata.per_file_metadata[stripe.source_idx].ff.types[columns_level[col_idx].id].kind; - // num_child_rows for a struct column will be same, for other nested types it will be - // calculated. - chunk.num_child_rows = (chunk.type_kind != orc::STRUCT) ? 0 : chunk.num_rows; - chunk.dtype_id = column_types[col_idx].id(); - chunk.decimal_scale = _metadata.per_file_metadata[stripe.source_idx] - .ff.types[columns_level[col_idx].id] - .scale.value_or(0); - - chunk.rowgroup_id = rowgroup_id; - chunk.dtype_len = (column_types[col_idx].id() == type_id::STRING) - ? sizeof(string_index_pair) - : ((column_types[col_idx].id() == type_id::LIST) or - (column_types[col_idx].id() == type_id::STRUCT)) - ? sizeof(size_type) - : cudf::size_of(column_types[col_idx]); - chunk.num_rowgroups = stripe_num_rowgroups; - if (chunk.type_kind == orc::TIMESTAMP) { chunk.timestamp_type_id = _config.timestamp_type.id(); } - if (not is_stripe_data_empty) { - for (int k = 0; k < gpu::CI_NUM_STREAMS; k++) { - chunk.streams[k] = dst_base + stream_info[chunk.strm_id[k]].dst_pos; - } - } - } - stripe_start_row += num_rows_per_stripe; - num_rowgroups += stripe_num_rowgroups; - - stripe_idx++; - } - - if (stripe_data.empty()) { continue; } - - // Process dataset chunk pages into output columns - auto row_groups = - cudf::detail::hostdevice_2dvector(num_rowgroups, num_columns, _stream); - if (level > 0 and row_groups.size().first) { - cudf::host_span row_groups_span(row_groups.base_host_ptr(), - num_rowgroups * num_columns); - auto& rw_grp_meta = col_meta.rwgrp_meta; - - // Update start row and num rows per row group - std::transform(rw_grp_meta.begin(), - rw_grp_meta.end(), - row_groups_span.begin(), - rw_grp_meta.begin(), - [&](auto meta, auto& row_grp) { - row_grp.num_rows = meta.num_rows; - row_grp.start_row = meta.start_row; - return meta; - }); - } - // Setup row group descriptors if using indexes - if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { - auto decomp_data = decompress_stripe_data(_file_itm_data.compinfo_map, - *_metadata.per_file_metadata[0].decompressor, - stripe_data, - stream_info, - chunks, - row_groups, - num_stripes, - _metadata.get_row_index_stride(), - level == 0, - _stream); - stripe_data.clear(); - stripe_data.push_back(std::move(decomp_data)); - } else { - if (row_groups.size().first) { - chunks.host_to_device_async(_stream); - row_groups.host_to_device_async(_stream); - row_groups.host_to_device_async(_stream); - gpu::ParseRowGroupIndex(row_groups.base_device_ptr(), - nullptr, - chunks.base_device_ptr(), - num_columns, - num_stripes, - num_rowgroups, - _metadata.get_row_index_stride(), - level == 0, - _stream); - } - } - - for (std::size_t i = 0; i < column_types.size(); ++i) { - bool is_nullable = false; - for (std::size_t j = 0; j < num_stripes; ++j) { - if (chunks[j][i].strm_len[gpu::CI_PRESENT] != 0) { - is_nullable = true; - break; - } - } - auto is_list_type = (column_types[i].id() == type_id::LIST); - auto n_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[i]; - // For list column, offset column will be always size + 1 - if (is_list_type) n_rows++; - _out_buffers[level].emplace_back(column_types[i], n_rows, is_nullable, _stream, _mr); - } - - decode_stream_data(num_dict_entries, - rows_to_skip, - _metadata.get_row_index_stride(), - level, - tz_table->view(), - chunks, - row_groups, - _out_buffers[level], - _stream, - _mr); - - if (nested_cols.size()) { - // Extract information to process nested child columns - scan_null_counts(chunks, null_count_prefix_sums[level], _stream); - - row_groups.device_to_host_sync(_stream); - aggregate_child_meta( - level, _selected_columns, chunks, row_groups, nested_cols, _out_buffers[level], col_meta); - - // ORC stores number of elements at each row, so we need to generate offsets from that - std::vector buff_data; - std::for_each( - _out_buffers[level].begin(), _out_buffers[level].end(), [&buff_data](auto& out_buffer) { - if (out_buffer.type.id() == type_id::LIST) { - auto data = static_cast(out_buffer.data()); - buff_data.emplace_back(list_buffer_data{data, out_buffer.size}); - } - }); - - if (not buff_data.empty()) { generate_offsets_for_list(buff_data, _stream); } - } - } // end loop level -} - -} // namespace cudf::io::orc::detail From de6b717c9e4d04d1e93917d72957ba237dc46328 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 20 Feb 2024 11:13:31 -0800 Subject: [PATCH 073/321] Fix compilation Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 30 +++++------- cpp/src/io/orc/reader_impl_chunking.hpp | 62 ++++++++++-------------- cpp/src/io/orc/reader_impl_preprocess.cu | 7 +-- 3 files changed, 43 insertions(+), 56 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index ccb6d3dc4e8..ff875e2050c 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -92,13 +92,11 @@ std::size_t gather_stream_info(std::size_t stripe_index, } if (col != -1) { - stream_info.emplace_back(stripeinfo->offset + src_offset, - dst_offset, - stream.length, - stream_id_info{stripe_index, - level, - column_id, - stream.kind}); + stream_info.emplace_back( + stripeinfo->offset + src_offset, + dst_offset, + stream.length, + stream_id_info{static_cast(stripe_index), level, column_id, stream.kind}); dst_offset += stream.length; } src_offset += stream.length; @@ -276,13 +274,12 @@ void reader::impl::global_preprocess(uint64_t skip_rows, lvl_stripe_data.resize(_selected_columns.num_levels()); lvl_stripe_sizes.resize(_selected_columns.num_levels()); - auto& read_info = _file_itm_data.data_read_info; - auto& stripe_data_read_chunks = _file_itm_data.stripe_data_read_chunks; - auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_chunks; + auto& read_info = _file_itm_data.data_read_info; + auto& stripe_data_read_chunks = _file_itm_data.stripe_data_read_chunks; + auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_chunks; // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. - std::unordered_map - stream_compinfo_map; + stream_id_map stream_compinfo_map; // Logically view streams as columns _file_itm_data.lvl_stream_info.resize(_selected_columns.num_levels()); @@ -463,7 +460,7 @@ void reader::impl::read_data() std::vector, std::size_t>> read_tasks; auto const& stripe_data_read_chunks = _file_itm_data.stripe_data_read_chunks; - auto const [read_begin, read_end] = get_range(stripe_data_read_chunks, stripe_chunk); + auto const [read_begin, read_end] = get_range(stripe_data_read_chunks, stripe_chunk); for (auto read_idx = read_begin; read_idx < read_end; ++read_idx) { auto const& read = read_info[read_idx]; @@ -506,8 +503,7 @@ void reader::impl::subpass_preprocess() // TODO: This is subpass // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. - std::unordered_map - stream_compinfo_map; + stream_id_map stream_compinfo_map; // TODO: fix this, loop only current chunk auto const stripe_chunk = @@ -546,7 +542,7 @@ void reader::impl::subpass_preprocess() static_cast(stripe_data[info.id.stripe_idx].data()) + info.dst_pos, info.length)); stream_compinfo_map[stream_id_info{ - info.id.stripe_idx, info.id.level, info.id.orc_cold_idx, info.id.kind}] = + info.id.stripe_idx, info.id.level, info.id.orc_col_idx, info.id.kind}] = &compinfo[compinfo.size() - 1]; #ifdef PRINT_DEBUG printf("collec stream [%d, %d, %d, %d]: dst = %lu, length = %lu\n", @@ -575,7 +571,7 @@ void reader::impl::subpass_preprocess() compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, stream_compinfo->num_uncompressed_blocks, stream_compinfo->max_uncompressed_size}; - stripe_decomp_sizes[stream_id.id.stripe_idx - stripe_chunk.start_idx].size_bytes += + stripe_decomp_sizes[stream_id.stripe_idx - stripe_chunk.start_idx].size_bytes += stream_compinfo->max_uncompressed_size; #ifdef PRINT_DEBUG printf("cache info [%d, %d, %d, %d]: %lu | %lu | %lu\n", diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 2d07cae3214..0dbfde47363 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -36,9 +36,9 @@ namespace cudf::io::orc::detail { struct stream_id_info { uint32_t stripe_idx; // global stripe id throughout the data source // TODO: change type below - std::size_t level; // level of the nested column - uint32_t orc_col_idx; // orc column id - StreamKind kind; // stream kind + std::size_t level; // level of the nested column + uint32_t orc_col_idx; // orc column id + StreamKind kind; // stream kind struct hash { std::size_t operator()(stream_id_info const& id) const @@ -60,13 +60,13 @@ struct stream_id_info { /** * @brief Map to lookup a value from stream id. -*/ -template + */ +template using stream_id_map = std::unordered_map; /** - * @brief Struct that store identification of an ORC streams. + * @brief Struct that store identification of an ORC stream. */ struct orc_stream_info { // TODO: remove constructor @@ -74,10 +74,7 @@ struct orc_stream_info { std::size_t dst_pos_, uint32_t length_, stream_id_info const& id_) - : offset(offset_), - dst_pos(dst_pos_), - length(length_), - id(id_) + : offset(offset_), dst_pos(dst_pos_), length(length_), id(id_) { #ifdef PRINT_DEBUG printf(" construct stripe id [%d, %d, %d, %d]\n", @@ -126,11 +123,10 @@ struct range { * @brief Struct to store file-level data that remains constant for all chunks being output. */ struct file_intermediate_data { - int64_t rows_to_skip; + int64_t rows_to_skip; size_type rows_to_read; std::vector selected_stripes; - // Return true if no rows or stripes to read. bool has_no_data() const { return rows_to_read == 0 || selected_stripes.empty(); } @@ -146,18 +142,16 @@ struct file_intermediate_data { // This is used to initialize the stripe_data buffers. std::vector> lvl_stripe_sizes; - - // Store information to identify where to read a chunk of data from source. // Each read corresponds to one or more consecutive streams combined. struct data_read_info { // TODO: remove constructor data_read_info(uint64_t offset_, - std::size_t length_, - std::size_t dst_pos_, - std::size_t source_idx_, - std::size_t stripe_idx_, - std::size_t level_) + std::size_t length_, + std::size_t dst_pos_, + std::size_t source_idx_, + std::size_t stripe_idx_, + std::size_t level_) : offset(offset_), length(length_), dst_pos(dst_pos_), @@ -166,40 +160,35 @@ struct file_intermediate_data { level(level_) { } - uint64_t offset; // offset in data source - std::size_t dst_pos; // offset to store data in memory relative to start of raw stripe data - std::size_t length; // data length to read - std::size_t source_idx; // the data source id - std::size_t stripe_idx; // stream id TODO: processing or source stripe id? - std::size_t level; // nested level + uint64_t offset; // offset in data source + std::size_t dst_pos; // offset to store data in memory relative to start of raw stripe data + std::size_t length; // data length to read + std::size_t source_idx; // the data source id + std::size_t stripe_idx; // stream id TODO: processing or source stripe id? + std::size_t level; // nested level }; - // Identify what data to read from source. + // Identify what data to read from source. std::vector data_read_info; // For each stripe, we perform a number of read for its streams. // Those reads are identified by a chunk of consecutive read info, stored in data_read_info. std::vector stripe_data_read_chunks; - // Store info for each ORC stream at each nested level. std::vector> lvl_stream_info; + // At each nested level, the streams for each stripe are stored consecutively in lvl_stream_info. + // This is used to identify the range of streams for each stripe from that vector. + std::vector> lvl_stripe_stream_chunks; -// At each nested level, the streams for each stripe are stored consecutively in lvl_stream_info. -// This is used to identify the range of streams for each stripe from that vector. - std::vector> lvl_stripe_stream_chunks; - - -// TODO + // TODO std::vector>> null_count_prefix_sums; // For data processing, decompression, and decoding. // Each 'chunk' of data here corresponds to an orc column, in a stripe, at a nested level. std::vector> lvl_data_chunks; - - bool global_preprocessed{false}; }; @@ -216,7 +205,8 @@ struct chunk_read_data { std::size_t data_read_limit; // approximate maximum size (in bytes) used for store // intermediate data, or 0 for no limit - // Chunks of stripes that can be load into memory such that their data size is within a size limit. + // Chunks of stripes that can be load into memory such that their data size is within a size + // limit. std::vector load_stripe_chunks; std::size_t curr_load_stripe_chunk{0}; bool more_stripe_to_load() const { return curr_load_stripe_chunk < load_stripe_chunks.size(); } diff --git a/cpp/src/io/orc/reader_impl_preprocess.cu b/cpp/src/io/orc/reader_impl_preprocess.cu index 2d0d01e056b..e22cfc9a6d2 100644 --- a/cpp/src/io/orc/reader_impl_preprocess.cu +++ b/cpp/src/io/orc/reader_impl_preprocess.cu @@ -193,12 +193,13 @@ rmm::device_buffer decompress_stripe_data( // printf("line %d\n", __LINE__); // fflush(stdout); - auto const& cached_comp_info = - compinfo_map.at(stream_id_info{info.id.stripe_idx, info.id.level, info.id.orc_cold_idx, info.id.kind}); + auto const& cached_comp_info = compinfo_map.at( + stream_id_info{info.id.stripe_idx, info.id.level, info.id.orc_col_idx, info.id.kind}); // printf("line %d\n", __LINE__); // fflush(stdout); // auto const& cached_comp_info = - // compinfo_map[stream_id_info{info.id.stripe_idx, info.id.level, info.id.orc_cold_idx, info.id.kind}]; + // compinfo_map[stream_id_info{info.id.stripe_idx, info.id.level, info.id.orc_cold_idx, + // info.id.kind}]; auto& stream_comp_info = compinfo[compinfo.size() - 1]; stream_comp_info.num_compressed_blocks = cached_comp_info.num_compressed_blocks; stream_comp_info.num_uncompressed_blocks = cached_comp_info.num_uncompressed_blocks; From c75daeb0dddb3301c7acc34f9d31f5b3762b65f9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 20 Feb 2024 11:25:59 -0800 Subject: [PATCH 074/321] Reformat Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 4 ++-- cpp/src/io/orc/reader_impl.hpp | 29 ++++++++++++++--------------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 48d110c61bd..0e3987f2cd5 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -38,8 +38,8 @@ reader::impl::impl(std::vector>&& sources, } reader::impl::impl(std::size_t output_size_limit, - std::size_t data_read_limit, - std::vector>&& sources, + std::size_t data_read_limit, + std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index d131d907fa1..2447346f2a0 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -84,12 +84,10 @@ class reader::impl { std::optional const& num_rows_opt, std::vector> const& stripes); - - private: /** * @brief Perform all the necessary data preprocessing before creating an output table. - * + * * This is the proxy to call all other data preprocessing functions, which are prerequisite * for generating an output table. * @@ -104,42 +102,43 @@ class reader::impl { /** * @brief Perform a global preprocessing step that executes exactly once for the entire duration * of the reader. - * - * In this step, the metadata of all stripes in the data source is parsed, and information about - * data streams for all selected columns in alls tripes are generated. If the reader has a data + * + * In this step, the metadata of all stripes in the data source is parsed, and information about + * data streams for all selected columns in alls tripes are generated. If the reader has a data * read limit, data size of all stripes are used to determine the chunks of consecutive - * stripes for reading each time using the `read_data()` step. This is to ensure that loading + * stripes for reading each time using the `read_data()` step. This is to ensure that loading * these stripes will not exceed a fixed portion the data read limit. - */ + */ void global_preprocess(uint64_t skip_rows, std::optional const& num_rows_opt, std::vector> const& stripes); /** * @brief Read stripes from the input source and store the data in the internal buffers. - * + * * If there is a data read limit, only a chunk of stripes are read at a time such that * their total data size does not exceed a fixed portion of the limit. Then, the data is * probed to determine the uncompressed sizes for these loaded stripes, which are in turn - * used to determine a subset of stripes to decompress and decode in the next step + * used to determine a subset of stripes to decompress and decode in the next step * `decompress_and_decode()`. - * This is to ensure that loading data together with decompression and decoding will not exceed + * This is to ensure that loading data together with decompression and decoding will not exceed * the data read limit. */ void read_data(); /** * TODO: merge with read data. - */ + */ void subpass_preprocess(); /** * @brief Decompress and decode the data in the internal buffers, and store the result into * an internal table. - * + * * If there is a data read limit, only a chunk of stripes are decompressed and decoded at a time. - * Then, the result is stored in an internal table, and sizes of its rows are computed - * to determine slices of rows to return as the output table in the final step `make_output_chunk`. + * Then, the result is stored in an internal table, and sizes of its rows are computed + * to determine slices of rows to return as the output table in the final step + * `make_output_chunk`. */ void decompress_and_decode(); From 3ae55fad33b8a7aba4a7cf3c8bf518cf8bee7756 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 20 Feb 2024 14:44:52 -0800 Subject: [PATCH 075/321] Fix style Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index a97ec081570..4e083d714ee 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -158,10 +158,10 @@ class reader::impl { // Reader configs struct { - data_type timestamp_type; // Override output timestamp resolution - bool use_index; // Enable or disable attempt to use row index for parsing - bool use_np_dtypes; // Enable or disable the conversion to numpy-compatible dtypes - std::vector decimal128_columns; // Control decimals conversion + data_type timestamp_type; // Override output timestamp resolution + bool use_index; // Enable or disable attempt to use row index for parsing + bool use_np_dtypes; // Enable or disable the conversion to numpy-compatible dtypes + std::vector decimal128_columns; // Control decimals conversion } const _config; // Intermediate data for internal processing. From 0188cc760221224a13d1735bf32aa306a2925847 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 20 Feb 2024 15:17:45 -0800 Subject: [PATCH 076/321] Move code around Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 82 ++++++++++++++------------ cpp/src/io/orc/reader_impl_chunking.cu | 5 ++ 2 files changed, 48 insertions(+), 39 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index a20456458bf..8384a9937bc 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -695,6 +695,13 @@ void reader::impl::prepare_data(uint64_t skip_rows, read_data(); } + decompress_and_decode(); +} + +// TODO: this should be called per chunk of stripes. +void reader::impl::decompress_and_decode() +{ + if (_file_itm_data.has_no_data()) { return; } auto const rows_to_skip = _file_itm_data.rows_to_skip; auto const rows_to_read = _file_itm_data.rows_to_read; auto const& selected_stripes = _file_itm_data.selected_stripes; @@ -988,45 +995,6 @@ void reader::impl::prepare_data(uint64_t skip_rows, } // end loop level } -reader::impl::impl(std::vector>&& sources, - orc_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - : reader::impl::impl(0UL, 0UL, std::move(sources), options, stream, mr) -{ -} - -reader::impl::impl(std::size_t output_size_limit, - std::size_t data_read_limit, - std::vector>&& sources, - orc_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - : _stream(stream), - _mr(mr), - _config{options.get_timestamp_type(), - options.is_enabled_use_index(), - options.is_enabled_use_np_dtypes(), - options.get_decimal128_columns()}, - _col_meta{std::make_unique()}, - _sources(std::move(sources)), - _metadata{_sources, stream}, - _selected_columns{_metadata.select_columns(options.get_columns())}, - _chunk_read_data{output_size_limit, data_read_limit} -{ -} - -table_with_metadata reader::impl::read(uint64_t skip_rows, - std::optional const& num_rows_opt, - std::vector> const& stripes) -{ - prepare_data(skip_rows, num_rows_opt, stripes); - return make_output_chunk(); -} - -// TODO: move code here -void reader::impl::decompress_and_decode() {} - table_with_metadata reader::impl::make_output_chunk() { // There is no columns in the table. @@ -1118,6 +1086,42 @@ table_metadata reader::impl::make_output_metadata() return out_metadata; } +reader::impl::impl(std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + : reader::impl::impl(0UL, 0UL, std::move(sources), options, stream, mr) +{ +} + +reader::impl::impl(std::size_t output_size_limit, + std::size_t data_read_limit, + std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + : _stream(stream), + _mr(mr), + _config{options.get_timestamp_type(), + options.is_enabled_use_index(), + options.is_enabled_use_np_dtypes(), + options.get_decimal128_columns()}, + _col_meta{std::make_unique()}, + _sources(std::move(sources)), + _metadata{_sources, stream}, + _selected_columns{_metadata.select_columns(options.get_columns())}, + _chunk_read_data{output_size_limit, data_read_limit} +{ +} + +table_with_metadata reader::impl::read(uint64_t skip_rows, + std::optional const& num_rows_opt, + std::vector> const& stripes) +{ + prepare_data(skip_rows, num_rows_opt, stripes); + return make_output_chunk(); +} + // Forward to implementation reader::reader(std::vector>&& sources, orc_reader_options const& options, diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index ef17adc6e64..2bfedea7506 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -337,6 +337,11 @@ void reader::impl::global_preprocess(uint64_t skip_rows, stripe_data_read_chunks.resize(num_stripes); lvl_stripe_stream_chunks.resize(_selected_columns.num_levels()); + // TODO: move this + auto& lvl_chunks = _file_itm_data.lvl_data_chunks; + lvl_chunks.resize(_selected_columns.num_levels()); + _out_buffers.resize(_selected_columns.num_levels()); + // TODO: Check if these data depends on pass and subpass, instead of global pass. // Prepare data. // Iterates through levels of nested columns, child column will be one level down From 25d810a3e0a1c897d74635983b46f7b5275a64f7 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 9 Feb 2024 16:11:03 +0700 Subject: [PATCH 077/321] Rename function Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 2 +- cpp/src/io/orc/reader_impl.hpp | 6 +++--- cpp/src/io/orc/reader_impl_chunking.cu | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 8384a9937bc..718029da652 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -692,7 +692,7 @@ void reader::impl::prepare_data(uint64_t skip_rows, // TODO: fix this, should be called once _chunk_read_data.curr_load_stripe_chunk = 0; while (_chunk_read_data.more_stripe_to_load()) { - read_data(); + load_data(); } decompress_and_decode(); diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 4e083d714ee..67454eb0378 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -106,7 +106,7 @@ class reader::impl { * In this step, the metadata of all stripes in the data source is parsed, and information about * data streams for all selected columns in alls tripes are generated. If the reader has a data * read limit, data size of all stripes are used to determine the chunks of consecutive - * stripes for reading each time using the `read_data()` step. This is to ensure that loading + * stripes for reading each time using the `load_data()` step. This is to ensure that loading * these stripes will not exceed a fixed portion the data read limit. */ void global_preprocess(uint64_t skip_rows, @@ -114,7 +114,7 @@ class reader::impl { std::vector> const& stripes); /** - * @brief Read stripes from the input source and store the data in the internal buffers. + * @brief Load stripes from the input source and store the data in the internal buffers. * * If there is a data read limit, only a chunk of stripes are read at a time such that * their total data size does not exceed a fixed portion of the limit. Then, the data is @@ -124,7 +124,7 @@ class reader::impl { * This is to ensure that loading data together with decompression and decoding will not exceed * the data read limit. */ - void read_data(); + void load_data(); /** * @brief Decompress and decode the data in the internal buffers, and store the result into diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 2bfedea7506..d3a04b7cec5 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -487,7 +487,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, } // Load each chunk from `load_stripe_chunks`. -void reader::impl::read_data() +void reader::impl::load_data() { if (_file_itm_data.has_no_data()) { return; } From 21b2a9a29a85cddeb736ed94762cd9a52a6171d3 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 20 Feb 2024 15:27:10 -0800 Subject: [PATCH 078/321] Remove redundant code # Conflicts: # cpp/src/io/orc/reader_impl.cu --- cpp/src/io/orc/reader_impl.cu | 1 - cpp/src/io/orc/reader_impl_chunking.cu | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 718029da652..ebb6cefb709 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -687,7 +687,6 @@ void reader::impl::prepare_data(uint64_t skip_rows, global_preprocess(skip_rows, num_rows_opt, stripes); - if (_file_itm_data.has_no_data()) { return; } // TODO: fix this, should be called once _chunk_read_data.curr_load_stripe_chunk = 0; diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index d3a04b7cec5..5a467f45768 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -459,9 +459,11 @@ void reader::impl::global_preprocess(uint64_t skip_rows, } // DEBUG only + // TODO: use 0.3 constant _chunk_read_data.data_read_limit = total_stripe_sizes[total_stripe_sizes.size() - 1].size_bytes / 3; + _chunk_read_data.load_stripe_chunks = find_splits(total_stripe_sizes, num_stripes, _chunk_read_data.data_read_limit); From a5622c6ac36aff9b2eabce880449f3e7cb3a71c9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 10 Feb 2024 15:14:45 +0700 Subject: [PATCH 079/321] Add comments --- cpp/src/io/orc/reader_impl.cu | 1 - cpp/src/io/orc/reader_impl_chunking.cu | 30 ++++++++++++++++++++------ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index ebb6cefb709..84eec8526af 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -687,7 +687,6 @@ void reader::impl::prepare_data(uint64_t skip_rows, global_preprocess(skip_rows, num_rows_opt, stripes); - // TODO: fix this, should be called once _chunk_read_data.curr_load_stripe_chunk = 0; while (_chunk_read_data.more_stripe_to_load()) { diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 5a467f45768..1f56cb0b4cd 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -152,11 +152,17 @@ std::size_t gather_stream_info_and_column_desc( namespace { +/** + * @brief Struct to accummulate sizes of chunks of some data such as stripe or rows. + */ struct cumulative_size { int64_t count{0}; std::size_t size_bytes{0}; }; +/** + * @brief Functor to sum up cummulative sizes. + */ struct cumulative_size_sum { __device__ cumulative_size operator()(cumulative_size const& a, cumulative_size const& b) const { @@ -165,6 +171,10 @@ struct cumulative_size_sum { }; #if 1 +/** + * @brief Find the splits of the input data such that each split has cummulative size less than a + * given `size_limit`. + */ std::vector find_splits(host_span sizes, int64_t total_count, size_t size_limit) @@ -225,6 +235,15 @@ std::vector find_splits(host_span sizes, #endif #ifdef PRINT_DEBUG +/** + * @brief Verify the splits, checking if they are correct. + * + * We need to verify that: + * 1. All chunk must have count > 0 + * 2. Chunks are continuous. + * 3. sum(all sizes in a chunk) < size_limit + * 4. sum(all counts in all chunks) == total_count. + */ void verify_splits(host_span splits, host_span sizes, size_type total_count, @@ -271,11 +290,11 @@ void verify_splits(host_span splits, #endif /** - * @brief + * @brief Find range of the data span by a given chunk of chunks. * - * @param input_chunks - * @param selected_chunks - * @return + * @param input_chunks The list of all data chunks + * @param selected_chunks A chunk of chunks in the input_chunks + * @return The range of data span by the selected chunk of given chunks */ std::pair get_range(std::vector const& input_chunks, chunk const& selected_chunks) @@ -284,7 +303,7 @@ std::pair get_range(std::vector const& input_chunks, auto const chunk_begin = selected_chunks.start_idx; auto const chunk_end = selected_chunks.start_idx + selected_chunks.count; - // The first and last chunk, according to selected_chunk + // The first and last chunk, according to selected_chunk. auto const& first_chunk = input_chunks[chunk_begin]; auto const& last_chunk = input_chunks[chunk_end - 1]; @@ -463,7 +482,6 @@ void reader::impl::global_preprocess(uint64_t skip_rows, _chunk_read_data.data_read_limit = total_stripe_sizes[total_stripe_sizes.size() - 1].size_bytes / 3; - _chunk_read_data.load_stripe_chunks = find_splits(total_stripe_sizes, num_stripes, _chunk_read_data.data_read_limit); From b3be9650c5abfa49e6219869cbedff3711b22d68 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 20 Feb 2024 18:46:03 -0800 Subject: [PATCH 080/321] Fix spell Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 1f56cb0b4cd..4acfe998c57 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -161,7 +161,7 @@ struct cumulative_size { }; /** - * @brief Functor to sum up cummulative sizes. + * @brief Functor to sum up cumulative sizes. */ struct cumulative_size_sum { __device__ cumulative_size operator()(cumulative_size const& a, cumulative_size const& b) const From 782b99a123ab482e2b79bbf367094cc2db09fe1e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 20 Feb 2024 19:14:55 -0800 Subject: [PATCH 081/321] Fix spell Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 4acfe998c57..d8182102a2b 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -172,7 +172,7 @@ struct cumulative_size_sum { #if 1 /** - * @brief Find the splits of the input data such that each split has cummulative size less than a + * @brief Find the splits of the input data such that each split has cumulative size less than a * given `size_limit`. */ std::vector find_splits(host_span sizes, From 97717f9aa7ed0a1992fad4c36f68183ff32211a0 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 20 Feb 2024 19:50:33 -0800 Subject: [PATCH 082/321] Move code around Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 49 +++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 84eec8526af..95a586e49c0 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -673,29 +673,6 @@ void generate_offsets_for_list(host_span buff_data, rmm::cuda_ } // namespace -void reader::impl::prepare_data(uint64_t skip_rows, - std::optional const& num_rows_opt, - std::vector> const& stripes) -{ - // Selected columns at different levels of nesting are stored in different elements - // of `selected_columns`; thus, size == 1 means no nested columns - CUDF_EXPECTS(skip_rows == 0 or _selected_columns.num_levels() == 1, - "skip_rows is not supported by nested columns"); - - // There are no columns in the table - if (_selected_columns.num_levels() == 0) { return; } - - global_preprocess(skip_rows, num_rows_opt, stripes); - - // TODO: fix this, should be called once - _chunk_read_data.curr_load_stripe_chunk = 0; - while (_chunk_read_data.more_stripe_to_load()) { - load_data(); - } - - decompress_and_decode(); -} - // TODO: this should be called per chunk of stripes. void reader::impl::decompress_and_decode() { @@ -993,6 +970,32 @@ void reader::impl::decompress_and_decode() } // end loop level } +void reader::impl::prepare_data(uint64_t skip_rows, + std::optional const& num_rows_opt, + std::vector> const& stripes) +{ + // Selected columns at different levels of nesting are stored in different elements + // of `selected_columns`; thus, size == 1 means no nested columns + CUDF_EXPECTS(skip_rows == 0 or _selected_columns.num_levels() == 1, + "skip_rows is not supported by nested columns"); + + // There are no columns in the table. + if (_selected_columns.num_levels() == 0) { return; } + + // Perform a global preprocessing step for the entire input sources. + global_preprocess(skip_rows, num_rows_opt, stripes); + + // TODO: fix this, should be called once + // TODO: only load data if needed. + _chunk_read_data.curr_load_stripe_chunk = 0; + while (_chunk_read_data.more_stripe_to_load()) { + load_data(); + } + + // TODO: only do if needed. + decompress_and_decode(); +} + table_with_metadata reader::impl::make_output_chunk() { // There is no columns in the table. From 6fbb424707b76388e244d8ba6779604b773e1001 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 20 Feb 2024 20:58:39 -0800 Subject: [PATCH 083/321] Remove unused var Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 3 --- 1 file changed, 3 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index d8182102a2b..d8569ccb2c0 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -344,9 +344,6 @@ void reader::impl::global_preprocess(uint64_t skip_rows, auto& stripe_data_read_chunks = _file_itm_data.stripe_data_read_chunks; auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_chunks; - // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. - stream_id_map stream_compinfo_map; - // Logically view streams as columns _file_itm_data.lvl_stream_info.resize(_selected_columns.num_levels()); From 99f0374539dc191a13d67b6dfb763c5127e9a833 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 21 Feb 2024 12:50:34 -0800 Subject: [PATCH 084/321] Misc Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 1 - cpp/src/io/orc/reader_impl.hpp | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 95a586e49c0..be79c4901e2 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -987,7 +987,6 @@ void reader::impl::prepare_data(uint64_t skip_rows, // TODO: fix this, should be called once // TODO: only load data if needed. - _chunk_read_data.curr_load_stripe_chunk = 0; while (_chunk_read_data.more_stripe_to_load()) { load_data(); } diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 67454eb0378..5ad0e49a889 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -103,6 +103,8 @@ class reader::impl { * @brief Perform a global preprocessing step that executes exactly once for the entire duration * of the reader. * + * TODO: rewrite, not use "ensure". + * * In this step, the metadata of all stripes in the data source is parsed, and information about * data streams for all selected columns in alls tripes are generated. If the reader has a data * read limit, data size of all stripes are used to determine the chunks of consecutive From f10467c8aa8bb334489f01220b240b90a6de0137 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 21 Feb 2024 14:20:25 -0800 Subject: [PATCH 085/321] Implement chunking interface Signed-off-by: Nghia Truong --- cpp/CMakeLists.txt | 1 + cpp/include/cudf/io/detail/orc.hpp | 71 +++++++++++++++++++++++++++- cpp/include/cudf/io/orc.hpp | 74 ++++++++++++++++++++++++++++++ cpp/src/io/functions.cpp | 42 +++++++++++++++++ cpp/src/io/orc/reader.cu | 56 ++++++++++++++++++++++ cpp/src/io/orc/reader_impl.cu | 28 ++++++----- cpp/src/io/orc/reader_impl.hpp | 21 +++++---- 7 files changed, 269 insertions(+), 24 deletions(-) create mode 100644 cpp/src/io/orc/reader.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index bc836eb393a..f8d794e3334 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -386,6 +386,7 @@ add_library( src/io/orc/aggregate_orc_metadata.cpp src/io/orc/dict_enc.cu src/io/orc/orc.cpp + src/io/orc/reader.cu src/io/orc/reader_impl.cu src/io/orc/reader_impl_chunking.cu src/io/orc/reader_impl_helpers.cpp diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index a0bf8b24b80..012b06d338a 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -41,10 +41,15 @@ namespace orc::detail { * @brief Class to read ORC dataset data into columns. */ class reader { - private: + protected: class impl; std::unique_ptr _impl; + /** + * @brief Default constructor, needed for subclassing. + */ + reader(); + public: /** * @brief Constructor from an array of datasources @@ -62,7 +67,7 @@ class reader { /** * @brief Destructor explicitly declared to avoid inlining in header */ - ~reader(); + virtual ~reader(); /** * @brief Reads the entire dataset. @@ -73,6 +78,67 @@ class reader { table_with_metadata read(orc_reader_options const& options); }; +/** + * @brief The reader class that supports iterative reading of a given file. + * + * This class intentionally subclasses the `reader` class with private inheritance to hide the + * `reader::read()` API. As such, only chunked reading APIs are supported. + */ +class chunked_reader : private reader { + public: + /** + * @brief Constructor from size limits and an array of data sources with reader options. + * + * The typical usage should be similar to this: + * ``` + * do { + * auto const chunk = reader.read_chunk(); + * // Process chunk + * } while (reader.has_next()); + * + * ``` + * + * If `output_size_limit == 0` (i.e., no reading limit), a call to `read_chunk()` will read the + * whole file and return a table containing all rows. + * + * TODO: data read limit + * + * @param output_size_limit Limit on total number of bytes to be returned per read, + * or `0` if there is no limit + * @param data_read_limit Limit on memory usage for the purposes of decompression and processing + * of input, or `0` if there is no limit + * @param sources Input `datasource` objects to read the dataset from + * @param options Settings for controlling reading behavior + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource to use for device memory allocation + */ + explicit chunked_reader(std::size_t output_size_limit, + std::size_t data_read_limit, + std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + + /** + * @brief Destructor explicitly-declared to avoid inlined in header. + * + * Since the declaration of the internal `_impl` object does not exist in this header, this + * destructor needs to be defined in a separate source file which can access to that object's + * declaration. + */ + ~chunked_reader(); + + /** + * @copydoc cudf::io::chunked_orc_reader::has_next + */ + [[nodiscard]] bool has_next() const; + + /** + * @copydoc cudf::io::chunked_orc_reader::read_chunk + */ + [[nodiscard]] table_with_metadata read_chunk() const; +}; + /** * @brief Class to write ORC dataset data into columns. */ @@ -133,5 +199,6 @@ class writer { */ void skip_close(); }; + } // namespace orc::detail } // namespace cudf::io diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index a3f76817f8a..d512f4a6cc4 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -404,6 +404,80 @@ table_with_metadata read_orc( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief The chunked orc reader class to read ORC file iteratively in to a series of + * tables, chunk by chunk. + * + * This class is designed to address the reading issue when reading very large ORC files such + * that sizes of their columns exceed the limit that can be stored in cudf columns. By reading the + * file content by chunks using this class, each chunk is guaranteed to have its size stay within + * the given limit. + */ +class chunked_orc_reader { + public: + /** + * @brief Default constructor, this should never be used. + * + * This is added just to satisfy cython. + */ + chunked_orc_reader() = default; + + /** + * @brief Constructor for chunked reader. + * + * This constructor requires the same `orc_reader_option` parameter as in + * `cudf::read_orc()`, and additional parameters to specify the size byte limits of the + * output table for each reading. + * + * TODO: data read limit + * + * @param output_size_limit Limit on total number of bytes to be returned per read, + * or `0` if there is no limit + * @param data_read_limit Limit on memory usage for the purposes of decompression and processing + * of input, or `0` if there is no limit + * @param options The options used to read Parquet file + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource to use for device memory allocation + */ + chunked_orc_reader(std::size_t output_size_limit, + std::size_t data_read_limit, + orc_reader_options const& options, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + + /** + * @brief Destructor, destroying the internal reader instance. + * + * Since the declaration of the internal `reader` object does not exist in this header, this + * destructor needs to be defined in a separate source file which can access to that object's + * declaration. + */ + ~chunked_orc_reader(); + + /** + * @brief Check if there is any data in the given file has not yet read. + * + * @return A boolean value indicating if there is any data left to read + */ + [[nodiscard]] bool has_next() const; + + /** + * @brief Read a chunk of rows in the given ORC file. + * + * The sequence of returned tables, if concatenated by their order, guarantees to form a complete + * dataset as reading the entire given file at once. + * + * An empty table will be returned if the given file is empty, or all the data in the file has + * been read and returned by the previous calls. + * + * @return An output `cudf::table` along with its metadata + */ + [[nodiscard]] table_with_metadata read_chunk() const; + + private: + std::unique_ptr reader; +}; + /** @} */ // end of group /** * @addtogroup io_writers diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index 42f2fd02d52..fc29201ce19 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -448,6 +448,48 @@ void write_orc(orc_writer_options const& options, rmm::cuda_stream_view stream) } } +/** + * @copydoc cudf::io::chunked_orc_reader::chunked_orc_reader + */ +chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, + std::size_t data_read_limit, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + : reader{std::make_unique(output_size_limit, + data_read_limit, + make_datasources(options.get_source()), + options, + stream, + mr)} +{ +} + +/** + * @copydoc cudf::io::chunked_orc_reader::~chunked_orc_reader + */ +chunked_orc_reader::~chunked_orc_reader() = default; + +/** + * @copydoc cudf::io::chunked_orc_reader::has_next + */ +bool chunked_orc_reader::has_next() const +{ + CUDF_FUNC_RANGE(); + CUDF_EXPECTS(reader != nullptr, "Reader has not been constructed properly."); + return reader->has_next(); +} + +/** + * @copydoc cudf::io::chunked_orc_reader::read_chunk + */ +table_with_metadata chunked_orc_reader::read_chunk() const +{ + CUDF_FUNC_RANGE(); + CUDF_EXPECTS(reader != nullptr, "Reader has not been constructed properly."); + return reader->read_chunk(); +} + /** * @copydoc cudf::io::orc_chunked_writer::orc_chunked_writer */ diff --git a/cpp/src/io/orc/reader.cu b/cpp/src/io/orc/reader.cu new file mode 100644 index 00000000000..325986d7aef --- /dev/null +++ b/cpp/src/io/orc/reader.cu @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "reader_impl.hpp" +#include "reader_impl_helpers.hpp" + +namespace cudf::io::orc::detail { + +// Constructor and destructor are defined within this translation unit. +reader::reader() = default; +reader::~reader() = default; + +reader::reader(std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + : _impl{std::make_unique(std::move(sources), options, stream, mr)} +{ +} + +table_with_metadata reader::read(orc_reader_options const& options) +{ + return _impl->read(options.get_skip_rows(), options.get_num_rows(), options.get_stripes()); +} + +chunked_reader::chunked_reader(std::size_t output_size_limit, + std::size_t data_read_limit, + std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + _impl = std::make_unique( + output_size_limit, data_read_limit, std::move(sources), options, stream, mr); +} + +chunked_reader::~chunked_reader() = default; + +bool chunked_reader::has_next() const { return _impl->has_next(); } + +table_with_metadata chunked_reader::read_chunk() const { return _impl->read_chunk(); } + +} // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index be79c4901e2..c7421c5e41f 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -1122,22 +1122,26 @@ table_with_metadata reader::impl::read(uint64_t skip_rows, return make_output_chunk(); } -// Forward to implementation -reader::reader(std::vector>&& sources, - orc_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - : _impl{std::make_unique(std::move(sources), options, stream, mr)} +bool reader::impl::has_next() { + prepare_data(0 /*skip_rows*/, std::nullopt /*num_rows, `std::nullopt` means unlimited*/, {}); + // return _chunk_read_info.current_chunk_idx < _chunk_read_info.chunks.size(); + return true; } -// Destructor within this translation unit -reader::~reader() = default; - -// Forward to implementation -table_with_metadata reader::read(orc_reader_options const& options) +table_with_metadata reader::impl::read_chunk() { - return _impl->read(options.get_skip_rows(), options.get_num_rows(), options.get_stripes()); + // Reset the output buffers to their original states (right after reader construction). + // Don't need to do it if we read the file all at once. + // if (_chunk_read_info.chunk_size_limit > 0) { + // _output_buffers.resize(0); + // for (auto const& buff : _output_buffers_template) { + // _output_buffers.emplace_back(column_buffer::empty_like(buff)); + // } + // } + + prepare_data(0 /*skip_rows*/, std::nullopt /*num_rows, `std::nullopt` means unlimited*/, {}); + return make_output_chunk(); } } // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 5ad0e49a889..d4e38304ddd 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -54,16 +54,7 @@ class reader::impl { rmm::mr::device_memory_resource* mr); /** - * @brief Constructor from a dataset source with reader options. - * - * @param output_size_limit Limit on total number of bytes to be returned per read, - * or `0` if there is no limit - * @param data_read_limit Limit on memory usage for the purposes of decompression and processing - * of input, or `0` if there is no limit - * @param sources Dataset sources - * @param options Settings for controlling reading behavior - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource to use for device memory allocation + * @copydoc cudf::io::orc::detail::chunked_reader */ explicit impl(std::size_t output_size_limit, std::size_t data_read_limit, @@ -84,6 +75,16 @@ class reader::impl { std::optional const& num_rows_opt, std::vector> const& stripes); + /** + * @copydoc cudf::io::chunked_orc_reader::has_next + */ + bool has_next(); + + /** + * @copydoc cudf::io::chunked_orc_reader::read_chunk + */ + table_with_metadata read_chunk(); + private: /** * @brief Perform all the necessary data preprocessing before creating an output table. From 28e328c838c1dd56f3411c3684f6845899c06757 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 21 Feb 2024 18:25:03 -0800 Subject: [PATCH 086/321] Cleanup Signed-off-by: Nghia Truong --- cpp/src/io/orc/aggregate_orc_metadata.cpp | 2 +- cpp/src/io/orc/reader.cu | 4 +-- cpp/src/io/orc/reader_impl.cu | 32 +++++------------------ cpp/src/io/orc/reader_impl.hpp | 10 +++---- cpp/src/io/orc/reader_impl_chunking.cu | 6 ++--- cpp/src/io/orc/reader_impl_helpers.hpp | 4 +-- 6 files changed, 20 insertions(+), 38 deletions(-) diff --git a/cpp/src/io/orc/aggregate_orc_metadata.cpp b/cpp/src/io/orc/aggregate_orc_metadata.cpp index 8412a767d3f..620294a1e47 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.cpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "aggregate_orc_metadata.hpp" +#include "io/orc/aggregate_orc_metadata.hpp" #include "io/utilities/row_selection.hpp" diff --git a/cpp/src/io/orc/reader.cu b/cpp/src/io/orc/reader.cu index 325986d7aef..855a96c9ae3 100644 --- a/cpp/src/io/orc/reader.cu +++ b/cpp/src/io/orc/reader.cu @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "reader_impl.hpp" -#include "reader_impl_helpers.hpp" +#include "io/orc/reader_impl.hpp" +#include "io/orc/reader_impl_helpers.hpp" namespace cudf::io::orc::detail { diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 59ab3f52eaa..ec517f93e23 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -18,10 +18,10 @@ #include "io/comp/gpuinflate.hpp" #include "io/comp/nvcomp_adapter.hpp" +#include "io/orc/reader_impl.hpp" +#include "io/orc/reader_impl_chunking.hpp" +#include "io/orc/reader_impl_helpers.hpp" #include "io/utilities/config_utils.hpp" -#include "reader_impl.hpp" -#include "reader_impl_chunking.hpp" -#include "reader_impl_helpers.hpp" #include #include @@ -981,16 +981,8 @@ void reader::impl::prepare_data(uint64_t skip_rows, // There are no columns in the table. if (_selected_columns.num_levels() == 0) { return; } - // Perform a global preprocessing step for the entire input sources. global_preprocess(skip_rows, num_rows_opt, stripes); - - // TODO: fix this, should be called once - // TODO: only load data if needed. - while (_chunk_read_data.more_stripe_to_load()) { - load_data(); - } - - // TODO: only do if needed. + load_data(); decompress_and_decode(); } @@ -1123,23 +1115,13 @@ table_with_metadata reader::impl::read(uint64_t skip_rows, bool reader::impl::has_next() { - prepare_data(0 /*skip_rows*/, std::nullopt /*num_rows, `std::nullopt` means unlimited*/, {}); - // return _chunk_read_info.current_chunk_idx < _chunk_read_info.chunks.size(); - return true; + prepare_data(); + return _chunk_read_data.has_next(); } table_with_metadata reader::impl::read_chunk() { - // Reset the output buffers to their original states (right after reader construction). - // Don't need to do it if we read the file all at once. - // if (_chunk_read_info.chunk_size_limit > 0) { - // _output_buffers.resize(0); - // for (auto const& buff : _output_buffers_template) { - // _output_buffers.emplace_back(column_buffer::empty_like(buff)); - // } - // } - - prepare_data(0 /*skip_rows*/, std::nullopt /*num_rows, `std::nullopt` means unlimited*/, {}); + prepare_data(); return make_output_chunk(); } diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 6996e366c55..4da73f3a08f 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -16,8 +16,8 @@ #pragma once -#include "aggregate_orc_metadata.hpp" -#include "reader_impl_chunking.hpp" +#include "io/orc/aggregate_orc_metadata.hpp" +#include "io/orc/reader_impl_chunking.hpp" #include #include @@ -96,9 +96,9 @@ class reader::impl { * @param num_rows_opt Optional number of rows to read, or `std::nullopt` to read all rows * @param stripes Indices of individual stripes to load if non-empty */ - void prepare_data(uint64_t skip_rows, - std::optional const& num_rows_opt, - std::vector> const& stripes); + void prepare_data(uint64_t skip_rows = 0, + std::optional const& num_rows_opt = std::nullopt, + std::vector> const& stripes = {}); /** * @brief Perform a global preprocessing step that executes exactly once for the entire duration diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 64cf668c508..4330c62c751 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -18,10 +18,10 @@ #include "io/comp/gpuinflate.hpp" #include "io/comp/nvcomp_adapter.hpp" +#include "io/orc/reader_impl.hpp" +#include "io/orc/reader_impl_chunking.hpp" +#include "io/orc/reader_impl_helpers.hpp" #include "io/utilities/config_utils.hpp" -#include "reader_impl.hpp" -#include "reader_impl_chunking.hpp" -#include "reader_impl_helpers.hpp" #include #include diff --git a/cpp/src/io/orc/reader_impl_helpers.hpp b/cpp/src/io/orc/reader_impl_helpers.hpp index 48742b5fc8c..6f83b567710 100644 --- a/cpp/src/io/orc/reader_impl_helpers.hpp +++ b/cpp/src/io/orc/reader_impl_helpers.hpp @@ -16,9 +16,9 @@ #pragma once -#include "aggregate_orc_metadata.hpp" +#include "io/orc/aggregate_orc_metadata.hpp" +#include "io/orc/orc.hpp" #include "io/utilities/column_buffer.hpp" -#include "orc.hpp" #include From 0effdb3c0e0321c94dd20ff757e74b2c8bf9e5ae Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 21 Feb 2024 21:07:44 -0800 Subject: [PATCH 087/321] Add `front` and `back` Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 7 +++++-- cpp/src/io/orc/reader_impl_chunking.cu | 14 ++++++-------- cpp/src/io/utilities/hostdevice_vector.hpp | 13 +++++++++++++ 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index ec517f93e23..1c5191624d0 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -112,7 +112,7 @@ rmm::device_buffer decompress_stripe_data( // auto const& cached_comp_info = // compinfo_map[stream_id_info{info.id.stripe_idx, info.id.level, info.id.orc_cold_idx, // info.id.kind}]; - auto& stream_comp_info = compinfo[compinfo.size() - 1]; + auto& stream_comp_info = compinfo.back(); stream_comp_info.num_compressed_blocks = cached_comp_info.num_compressed_blocks; stream_comp_info.num_uncompressed_blocks = cached_comp_info.num_uncompressed_blocks; stream_comp_info.max_uncompressed_size = cached_comp_info.total_decomp_size; @@ -982,7 +982,10 @@ void reader::impl::prepare_data(uint64_t skip_rows, if (_selected_columns.num_levels() == 0) { return; } global_preprocess(skip_rows, num_rows_opt, stripes); - load_data(); + // load_data(); + while (_chunk_read_data.more_stripe_to_load()) { + load_data(); + } decompress_and_decode(); } diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 4330c62c751..cdd060aca4e 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -282,7 +282,7 @@ void verify_splits(host_span splits, } cur_cumulative_size = sizes[split.start_idx + split.count - 1].size_bytes; } - CUDF_EXPECTS(last_split.start_idx + last_split.count == sizes[sizes.size() - 1].count, + CUDF_EXPECTS(last_split.start_idx + last_split.count == sizes.back().count, "Invalid split start_idx."); CUDF_EXPECTS(count == total_count, "Invalid total count."); } @@ -324,7 +324,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // TODO: move this to end of func. _file_itm_data.global_preprocessed = true; - // Select only stripes required (aka row groups) + // Load stripes's metadata. std::tie( _file_itm_data.rows_to_skip, _file_itm_data.rows_to_read, _file_itm_data.selected_stripes) = _metadata.select_stripes(stripes, skip_rows, num_rows_opt, _stream); @@ -347,7 +347,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, _file_itm_data.lvl_stream_info.resize(_selected_columns.num_levels()); // Get the total number of stripes across all input files. - std::size_t num_stripes = selected_stripes.size(); + auto const num_stripes = selected_stripes.size(); stripe_data_read_chunks.resize(num_stripes); lvl_stripe_stream_chunks.resize(_selected_columns.num_levels()); @@ -475,8 +475,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // DEBUG only // TODO: use 0.3 constant - _chunk_read_data.data_read_limit = - total_stripe_sizes[total_stripe_sizes.size() - 1].size_bytes / 3; + _chunk_read_data.data_read_limit = total_stripe_sizes.back().size_bytes / 3; _chunk_read_data.load_stripe_chunks = find_splits(total_stripe_sizes, num_stripes, _chunk_read_data.data_read_limit); @@ -603,8 +602,7 @@ void reader::impl::load_data() static_cast(stripe_data[info.id.stripe_idx].data()) + info.dst_pos, info.length)); stream_compinfo_map[stream_id_info{ - info.id.stripe_idx, info.id.level, info.id.orc_col_idx, info.id.kind}] = - &compinfo[compinfo.size() - 1]; + info.id.stripe_idx, info.id.level, info.id.orc_col_idx, info.id.kind}] = &compinfo.back(); #ifdef PRINT_DEBUG printf("collec stream [%d, %d, %d, %d]: dst = %lu, length = %lu\n", (int)info.id.stripe_idx, @@ -684,7 +682,7 @@ void reader::impl::load_data() // DEBUG only // _chunk_read_data.data_read_limit = - // stripe_decompression_sizes[stripe_decompression_sizes.size() - 1].size_bytes / 3; + // stripe_decompression_sizes.back().size_bytes / 3; _chunk_read_data.decode_stripe_chunks = find_splits(stripe_decomp_sizes, stripe_chunk.count, _chunk_read_data.data_read_limit); diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index 3cd70801cdf..39059c665dc 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -96,6 +96,19 @@ class hostdevice_vector { [[nodiscard]] size_t size_bytes() const noexcept { return sizeof(T) * size(); } [[nodiscard]] bool empty() const noexcept { return size() == 0; } + [[nodiscard]] T& front() + { + CUDF_EXPECTS(size() > 0, "Cannot access `front()` element of an empty vector."); + return host_data[0]; + } + [[nodiscard]] T& back() + { + CUDF_EXPECTS(size() > 0, "Cannot access `back()` element of an empty vector."); + return host_data[size() - 1]; + } + [[nodiscard]] T const& front() const { return front(); } + [[nodiscard]] T const& back() const { return back(); } + [[nodiscard]] T& operator[](size_t i) { return host_data[i]; } [[nodiscard]] T const& operator[](size_t i) const { return host_data[i]; } From bbb6b47ec0c488b55e0428b7ee2637d569f073df Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 21 Feb 2024 21:15:15 -0800 Subject: [PATCH 088/321] Rename variable Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index be3a8916d1f..2b172011259 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -143,14 +143,14 @@ struct file_intermediate_data { // Store information to identify where to read a chunk of data from source. // Each read corresponds to one or more consecutive streams combined. - struct data_read_info { + struct stream_data_read_info { // TODO: remove constructor - data_read_info(uint64_t offset_, - std::size_t length_, - std::size_t dst_pos_, - std::size_t source_idx_, - std::size_t stripe_idx_, - std::size_t level_) + stream_data_read_info(uint64_t offset_, + std::size_t length_, + std::size_t dst_pos_, + std::size_t source_idx_, + std::size_t stripe_idx_, + std::size_t level_) : offset(offset_), length(length_), dst_pos(dst_pos_), @@ -168,7 +168,7 @@ struct file_intermediate_data { }; // Identify what data to read from source. - std::vector data_read_info; + std::vector data_read_info; // For each stripe, we perform a number of read for its streams. // Those reads are identified by a chunk of consecutive read info, stored in data_read_info. From cce2d34f5a79f2862d764b71467eb51d5f58203d Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 21 Feb 2024 22:20:52 -0800 Subject: [PATCH 089/321] Misc Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 2 +- cpp/src/io/orc/reader_impl_chunking.cu | 4 +--- cpp/src/io/orc/reader_impl_chunking.hpp | 3 +++ 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 1c5191624d0..bbec21c4910 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -866,7 +866,7 @@ void reader::impl::decompress_and_decode() num_rowgroups += stripe_num_rowgroups; stripe_idx++; - } + } // for (stripe : selected_stripes) if (stripe_data.empty()) { continue; } diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index cdd060aca4e..3c354bd0f7a 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -320,8 +320,6 @@ void reader::impl::global_preprocess(uint64_t skip_rows, std::vector> const& stripes) { if (_file_itm_data.global_preprocessed) { return; } - - // TODO: move this to end of func. _file_itm_data.global_preprocessed = true; // Load stripes's metadata. @@ -455,7 +453,6 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // Load all chunks if there is no read limit. if (_chunk_read_data.data_read_limit == 0) { _chunk_read_data.load_stripe_chunks = {chunk{0, static_cast(num_stripes)}}; - // TODO: DEBUG only // return; } @@ -520,6 +517,7 @@ void reader::impl::load_data() auto const stripe_end = stripe_chunk.start_idx + stripe_chunk.count; // Prepare the buffer to read raw data onto. + // TODO: clear all old buffer. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto& stripe_data = lvl_stripe_data[level]; auto& stripe_sizes = lvl_stripe_sizes[level]; diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 2b172011259..4ab5b6bd427 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -129,6 +129,9 @@ struct file_intermediate_data { // Return true if no rows or stripes to read. bool has_no_data() const { return rows_to_read == 0 || selected_stripes.empty(); } + // TODO: remove + std::size_t num_stripes() const { return selected_stripes.size(); } + // Store the compression information for each data stream. stream_id_map compinfo_map; From e9430e8adf2b60eee1cf985fd2a5d96038527276 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 22 Feb 2024 14:13:25 -0800 Subject: [PATCH 090/321] Testing multiple decoding step Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 77 +- cpp/src/io/orc/reader_impl_chunking.cu | 13 +- cpp/src/io/orc/reader_impl_chunking.hpp | 3 + cpp/tests/io/orc_test.cpp | 2033 +---------------------- 4 files changed, 109 insertions(+), 2017 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index bbec21c4910..4bb86091fb0 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -16,6 +16,12 @@ // #define PRINT_DEBUG +// TODO: remove +#include +// +// +// + #include "io/comp/gpuinflate.hpp" #include "io/comp/nvcomp_adapter.hpp" #include "io/orc/reader_impl.hpp" @@ -676,6 +682,12 @@ void generate_offsets_for_list(host_span buff_data, rmm::cuda_ void reader::impl::decompress_and_decode() { if (_file_itm_data.has_no_data()) { return; } + + // auto const stripe_chunk = + // _chunk_read_data.load_stripe_chunks[_chunk_read_data.curr_decode_stripe_chunk++]; + // auto const stripe_start = stripe_chunk.start_idx; + // auto const stripe_end = stripe_chunk.start_idx + stripe_chunk.count; + auto const rows_to_skip = _file_itm_data.rows_to_skip; auto const rows_to_read = _file_itm_data.rows_to_read; auto const& selected_stripes = _file_itm_data.selected_stripes; @@ -982,11 +994,19 @@ void reader::impl::prepare_data(uint64_t skip_rows, if (_selected_columns.num_levels() == 0) { return; } global_preprocess(skip_rows, num_rows_opt, stripes); + + // TODO: only load data if there is no loaded stripe ready to decode. // load_data(); while (_chunk_read_data.more_stripe_to_load()) { load_data(); + printf("done load data\n\n"); } + + // decompress_and_decode(); + // while (_chunk_read_data.more_stripe_to_decode()) { decompress_and_decode(); + _file_itm_data.out_buffers.push_back(std::move(_out_buffers)); + // } } table_with_metadata reader::impl::make_output_chunk() @@ -1017,19 +1037,54 @@ table_with_metadata reader::impl::make_output_chunk() // TODO: move this into decompress_and_decode // Create columns from buffer with respective schema information. - std::transform( - _selected_columns.levels[0].begin(), - _selected_columns.levels[0].end(), - std::back_inserter(out_columns), - [&](auto const& orc_col_meta) { - out_metadata.schema_info.emplace_back(""); - auto col_buffer = assemble_buffer( - orc_col_meta.id, 0, *_col_meta, _metadata, _selected_columns, _out_buffers, _stream, _mr); - return make_column(col_buffer, &out_metadata.schema_info.back(), std::nullopt, _stream); - }); + + // TODO: remove + std::vector> tabs; + std::vector tv; + + for (auto& buffers : _file_itm_data.out_buffers) { + // + out_columns.clear(); // TODO: remove + + std::transform(_selected_columns.levels[0].begin(), + _selected_columns.levels[0].end(), + std::back_inserter(out_columns), + [&](auto const& orc_col_meta) { + out_metadata.schema_info.emplace_back(""); + auto col_buffer = assemble_buffer(orc_col_meta.id, + 0, + *_col_meta, + _metadata, + _selected_columns, + buffers, /*_out_buffers*/ + _stream, + _mr); + return make_column( + col_buffer, &out_metadata.schema_info.back(), std::nullopt, _stream); + }); + + auto tbl = std::make_unique
(std::move(out_columns)); + tabs.push_back(std::move(tbl)); + tv.push_back(tabs.back()->view()); + + // + printf(" ----- decode one chunk\n"); + fflush(stdout); + // + // + // + // + } + printf(" ----- decode total %d chunks\n", (int)tv.size()); + fflush(stdout); // todo: remove this - auto out_table = std::make_unique
(std::move(out_columns)); + // auto out_table = std::make_unique
(std::move(out_columns)); + auto out_table = [&] { + if (tv.size() > 1) { return cudf::concatenate(tv); } + return std::move(tabs.front()); + }(); + // auto out_table = std::move(tabs.front()); #if 0 auto out_table = [&] { diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 3c354bd0f7a..3acd13964e7 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -466,6 +466,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, total_stripe_sizes.device_to_host_sync(_stream); + printf("total stripe sizes:\n"); for (auto& size : total_stripe_sizes) { printf("size: %ld, %zu\n", size.count, size.size_bytes); } @@ -479,7 +480,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, #ifndef PRINT_DEBUG auto& splits = _chunk_read_data.load_stripe_chunks; - printf("------------\nSplits (/%d): \n", (int)num_stripes); + printf("------------\nSplits (/total num stripe = %d): \n", (int)num_stripes); for (size_t idx = 0; idx < splits.size(); idx++) { printf("{%ld, %ld}\n", splits[idx].start_idx, splits[idx].count); } @@ -516,6 +517,8 @@ void reader::impl::load_data() auto const stripe_start = stripe_chunk.start_idx; auto const stripe_end = stripe_chunk.start_idx + stripe_chunk.count; + printf("loading data from stripe %d -> %d\n", (int)stripe_start, (int)stripe_end); + // Prepare the buffer to read raw data onto. // TODO: clear all old buffer. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { @@ -679,8 +682,7 @@ void reader::impl::load_data() stripe_decomp_sizes.device_to_host_sync(_stream); // DEBUG only - // _chunk_read_data.data_read_limit = - // stripe_decompression_sizes.back().size_bytes / 3; + _chunk_read_data.data_read_limit = stripe_decomp_sizes.back().size_bytes / 3; _chunk_read_data.decode_stripe_chunks = find_splits(stripe_decomp_sizes, stripe_chunk.count, _chunk_read_data.data_read_limit); @@ -694,7 +696,7 @@ void reader::impl::load_data() #ifndef PRINT_DEBUG auto& splits = _chunk_read_data.decode_stripe_chunks; - printf("------------\nSplits second level (/%d): \n", (int)stripe_chunk.count); + printf("------------\nSplits decode_stripe_chunks (/%d): \n", (int)stripe_chunk.count); for (size_t idx = 0; idx < splits.size(); idx++) { printf("{%ld, %ld}\n", splits[idx].start_idx, splits[idx].count); } @@ -715,6 +717,9 @@ void reader::impl::load_data() // lvl_stripe_data.clear(); // _file_itm_data.compinfo_ready = true; + + // Decoding is reset to start from the first chunk in `decode_stripe_chunks`. + _chunk_read_data.curr_decode_stripe_chunk = 0; } } // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 4ab5b6bd427..becb9a1d0d5 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -122,6 +122,9 @@ struct range { * @brief Struct to store file-level data that remains constant for all chunks being output. */ struct file_intermediate_data { + // TODO: remove + std::vector>> out_buffers; + int64_t rows_to_skip; size_type rows_to_read; std::vector selected_stripes; diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index b972311d988..36ef05ecc36 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -95,1305 +95,32 @@ std::unique_ptr create_random_fixed_table(cudf::size_type num_colum return std::make_unique(std::move(columns)); } -// Base test fixture for tests -struct OrcWriterTest : public cudf::test::BaseFixture {}; - -// Typed test fixture for numeric type tests -template -struct OrcWriterNumericTypeTest : public OrcWriterTest { - auto type() { return cudf::data_type{cudf::type_to_id()}; } -}; - -// Typed test fixture for timestamp type tests -template -struct OrcWriterTimestampTypeTest : public OrcWriterTest { - auto type() { return cudf::data_type{cudf::type_to_id()}; } -}; - -// Declare typed test cases -// TODO: Replace with `NumericTypes` when unsigned support is added. Issue #5351 -using SupportedTypes = cudf::test::Types; -TYPED_TEST_SUITE(OrcWriterNumericTypeTest, SupportedTypes); -using SupportedTimestampTypes = - cudf::test::RemoveIf>, - cudf::test::TimestampTypes>; -TYPED_TEST_SUITE(OrcWriterTimestampTypeTest, SupportedTimestampTypes); - -// Base test fixture for chunked writer tests -struct OrcChunkedWriterTest : public cudf::test::BaseFixture {}; - -// Typed test fixture for numeric type tests -template -struct OrcChunkedWriterNumericTypeTest : public OrcChunkedWriterTest { - auto type() { return cudf::data_type{cudf::type_to_id()}; } -}; - -// Declare typed test cases -TYPED_TEST_SUITE(OrcChunkedWriterNumericTypeTest, SupportedTypes); - -// Test fixture for reader tests -struct OrcReaderTest : public cudf::test::BaseFixture {}; - -// Test fixture for statistics tests -struct OrcStatisticsTest : public cudf::test::BaseFixture {}; - -// Test fixture for metadata tests -struct OrcMetadataReaderTest : public cudf::test::BaseFixture {}; - -struct OrcCompressionTest : public cudf::test::BaseFixture, - public ::testing::WithParamInterface {}; - -namespace { -// Generates a vector of uniform random values of type T -template -inline auto random_values(size_t size) -{ - std::vector values(size); - - using T1 = T; - using uniform_distribution = - typename std::conditional_t, - std::bernoulli_distribution, - std::conditional_t, - std::uniform_real_distribution, - std::uniform_int_distribution>>; - - static constexpr auto seed = 0xf00d; - static std::mt19937 engine{seed}; - static uniform_distribution dist{}; - std::generate_n(values.begin(), size, [&]() { return T{dist(engine)}; }); - - return values; -} - -struct SkipRowTest { - int test_calls{0}; - SkipRowTest() {} - - std::unique_ptr
get_expected_result(std::string const& filepath, - int skip_rows, - int file_num_rows, - int read_num_rows) - { - auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; }); - column_wrapper input_col( - sequence, sequence + file_num_rows); - table_view input_table({input_col}); - - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, input_table); - cudf::io::write_orc(out_opts); - - auto begin_sequence = sequence, end_sequence = sequence; - if (skip_rows < file_num_rows) { - begin_sequence += skip_rows; - end_sequence += std::min(skip_rows + read_num_rows, file_num_rows); - } - column_wrapper output_col(begin_sequence, - end_sequence); - std::vector> output_cols; - output_cols.push_back(output_col.release()); - return std::make_unique
(std::move(output_cols)); - } - - void test(int skip_rows, int file_num_rows, int read_num_rows) - { - auto filepath = - temp_env->get_temp_filepath("SkipRowTest" + std::to_string(test_calls++) + ".orc"); - auto expected_result = get_expected_result(filepath, skip_rows, file_num_rows, read_num_rows); - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) - .use_index(false) - .skip_rows(skip_rows) - .num_rows(read_num_rows); - auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_result->view(), result.tbl->view()); - } - - void test(int skip_rows, int file_num_rows) - { - auto filepath = - temp_env->get_temp_filepath("SkipRowTest" + std::to_string(test_calls++) + ".orc"); - auto expected_result = - get_expected_result(filepath, skip_rows, file_num_rows, file_num_rows - skip_rows); - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) - .use_index(false) - .skip_rows(skip_rows); - auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_result->view(), result.tbl->view()); - } -}; - -} // namespace - -TYPED_TEST(OrcWriterNumericTypeTest, SingleColumn) -{ - auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 100; }); - - constexpr auto num_rows = 10000000; - column_wrapper col(sequence, - sequence + num_rows); - table_view expected({col}); - - auto filepath = temp_env->get_temp_filepath("OrcSingleColumn.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) - .compression(cudf::io::compression_type::SNAPPY); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); - auto result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); -} - -TYPED_TEST(OrcWriterNumericTypeTest, SingleColumnWithNulls) -{ - auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; }); - auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 2); }); - - constexpr auto num_rows = 100; - column_wrapper col( - sequence, sequence + num_rows, validity); - table_view expected({col}); - - auto filepath = temp_env->get_temp_filepath("OrcSingleColumnWithNulls.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); - auto result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); -} - -TYPED_TEST(OrcWriterTimestampTypeTest, Timestamps) -{ - auto sequence = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (std::rand() / 10); }); - - constexpr auto num_rows = 100; - column_wrapper col(sequence, - sequence + num_rows); - table_view expected({col}); - - auto filepath = temp_env->get_temp_filepath("OrcTimestamps.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) - .use_index(false) - .timestamp_type(this->type()); - auto result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); -} - -TYPED_TEST(OrcWriterTimestampTypeTest, TimestampsWithNulls) -{ - auto sequence = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (std::rand() / 10); }); - auto validity = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i > 30) && (i < 60); }); - - constexpr auto num_rows = 100; - column_wrapper col( - sequence, sequence + num_rows, validity); - table_view expected({col}); - - auto filepath = temp_env->get_temp_filepath("OrcTimestampsWithNulls.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) - .use_index(false) - .timestamp_type(this->type()); - auto result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); -} - -TYPED_TEST(OrcWriterTimestampTypeTest, TimestampOverflow) -{ - constexpr int64_t max = std::numeric_limits::max(); - auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return max - i; }); - - constexpr auto num_rows = 100; - column_wrapper col(sequence, - sequence + num_rows); - table_view expected({col}); - - auto filepath = temp_env->get_temp_filepath("OrcTimestampOverflow.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) - .use_index(false) - .timestamp_type(this->type()); - auto result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); -} - -TEST_F(OrcWriterTest, MultiColumn) -{ - constexpr auto num_rows = 10; - - auto col0_data = random_values(num_rows); - auto col1_data = random_values(num_rows); - auto col2_data = random_values(num_rows); - auto col3_data = random_values(num_rows); - auto col4_data = random_values(num_rows); - auto col5_data = random_values(num_rows); - auto col6_vals = random_values(num_rows); - auto col6_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { - return numeric::decimal128{col6_vals[i], numeric::scale_type{12}}; - }); - auto col7_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { - return numeric::decimal128{col6_vals[i], numeric::scale_type{-12}}; - }); - - bool_col col0(col0_data.begin(), col0_data.end()); - int8_col col1(col1_data.begin(), col1_data.end()); - int16_col col2(col2_data.begin(), col2_data.end()); - int32_col col3(col3_data.begin(), col3_data.end()); - float32_col col4(col4_data.begin(), col4_data.end()); - float64_col col5(col5_data.begin(), col5_data.end()); - dec128_col col6(col6_data, col6_data + num_rows); - dec128_col col7(col7_data, col7_data + num_rows); - - list_col col8{ - {9, 8}, {7, 6, 5}, {}, {4}, {3, 2, 1, 0}, {20, 21, 22, 23, 24}, {}, {66, 666}, {}, {-1, -2}}; - - int32_col child_col{48, 27, 25, 31, 351, 351, 29, 15, -1, -99}; - struct_col col9{child_col}; - - table_view expected({col0, col1, col2, col3, col4, col5, col6, col7, col8, col9}); - - cudf::io::table_input_metadata expected_metadata(expected); - expected_metadata.column_metadata[0].set_name("bools"); - expected_metadata.column_metadata[1].set_name("int8s"); - expected_metadata.column_metadata[2].set_name("int16s"); - expected_metadata.column_metadata[3].set_name("int32s"); - expected_metadata.column_metadata[4].set_name("floats"); - expected_metadata.column_metadata[5].set_name("doubles"); - expected_metadata.column_metadata[6].set_name("decimal_pos_scale"); - expected_metadata.column_metadata[7].set_name("decimal_neg_scale"); - expected_metadata.column_metadata[8].set_name("lists"); - expected_metadata.column_metadata[9].set_name("structs"); - - auto filepath = temp_env->get_temp_filepath("OrcMultiColumn.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) - .metadata(expected_metadata); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); - auto result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); - cudf::test::expect_metadata_equal(expected_metadata, result.metadata); -} - -TEST_F(OrcWriterTest, MultiColumnWithNulls) -{ - constexpr auto num_rows = 10; - - auto col0_data = random_values(num_rows); - auto col1_data = random_values(num_rows); - auto col2_data = random_values(num_rows); - auto col3_data = random_values(num_rows); - auto col4_data = random_values(num_rows); - auto col5_data = random_values(num_rows); - auto col6_vals = random_values(num_rows); - auto col6_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { - return numeric::decimal64{col6_vals[i], numeric::scale_type{2}}; - }); - auto col0_mask = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 2); }); - auto col1_mask = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i < 2); }); - auto col3_mask = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i == (num_rows - 1)); }); - auto col4_mask = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i >= 4 && i <= 6); }); - auto col5_mask = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i > 8); }); - auto col6_mask = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 3); }); - - bool_col col0{col0_data.begin(), col0_data.end(), col0_mask}; - int8_col col1{col1_data.begin(), col1_data.end(), col1_mask}; - int16_col col2(col2_data.begin(), col2_data.end()); - int32_col col3{col3_data.begin(), col3_data.end(), col3_mask}; - float32_col col4{col4_data.begin(), col4_data.end(), col4_mask}; - float64_col col5{col5_data.begin(), col5_data.end(), col5_mask}; - dec64_col col6{col6_data, col6_data + num_rows, col6_mask}; - list_col col7{ - {{9, 8}, {7, 6, 5}, {}, {4}, {3, 2, 1, 0}, {20, 21, 22, 23, 24}, {}, {66, 666}, {}, {-1, -2}}, - col0_mask}; - auto ages_col = cudf::test::fixed_width_column_wrapper{ - {48, 27, 25, 31, 351, 351, 29, 15, -1, -99}, {1, 0, 1, 1, 0, 1, 1, 1, 0, 1}}; - struct_col col8{{ages_col}, {0, 1, 1, 0, 1, 1, 0, 1, 1, 0}}; - table_view expected({col0, col1, col2, col3, col4, col5, col6, col7, col8}); - - cudf::io::table_input_metadata expected_metadata(expected); - expected_metadata.column_metadata[0].set_name("bools"); - expected_metadata.column_metadata[1].set_name("int8s"); - expected_metadata.column_metadata[2].set_name("int16s"); - expected_metadata.column_metadata[3].set_name("int32s"); - expected_metadata.column_metadata[4].set_name("floats"); - expected_metadata.column_metadata[5].set_name("doubles"); - expected_metadata.column_metadata[6].set_name("decimal"); - expected_metadata.column_metadata[7].set_name("lists"); - expected_metadata.column_metadata[8].set_name("structs"); - - auto filepath = temp_env->get_temp_filepath("OrcMultiColumnWithNulls.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) - .metadata(expected_metadata); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); - auto result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); - cudf::test::expect_metadata_equal(expected_metadata, result.metadata); -} - -TEST_F(OrcWriterTest, ReadZeroRows) -{ - auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; }); - - constexpr auto num_rows = 10; - column_wrapper col(sequence, - sequence + num_rows); - table_view expected({col}); - - auto filepath = temp_env->get_temp_filepath("OrcSingleColumn.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) - .use_index(false) - .num_rows(0); - auto result = cudf::io::read_orc(in_opts); - - EXPECT_EQ(0, result.tbl->num_rows()); - EXPECT_EQ(1, result.tbl->num_columns()); -} - -TEST_F(OrcWriterTest, Strings) -{ - std::vector strings{ - "Monday", "Monday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"}; - auto const num_rows = strings.size(); - - auto seq_col0 = random_values(num_rows); - auto seq_col2 = random_values(num_rows); - - int32_col col0(seq_col0.begin(), seq_col0.end()); - str_col col1(strings.begin(), strings.end()); - float32_col col2(seq_col2.begin(), seq_col2.end()); - - table_view expected({col0, col1, col2}); - - cudf::io::table_input_metadata expected_metadata(expected); - expected_metadata.column_metadata[0].set_name("col_other"); - expected_metadata.column_metadata[1].set_name("col_string"); - expected_metadata.column_metadata[2].set_name("col_another"); - - auto filepath = temp_env->get_temp_filepath("OrcStrings.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) - .metadata(expected_metadata); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); - auto result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); - cudf::test::expect_metadata_equal(expected_metadata, result.metadata); -} - -TEST_F(OrcWriterTest, SlicedTable) -{ - // This test checks for writing zero copy, offsetted views into existing cudf tables - - std::vector strings{ - "Monday", "Monday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"}; - auto const num_rows = strings.size(); - - auto seq_col0 = random_values(num_rows); - auto seq_col2 = random_values(num_rows); - auto vals_col3 = random_values(num_rows); - auto seq_col3 = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { - return numeric::decimal64{vals_col3[i], numeric::scale_type{2}}; - }); - - int32_col col0(seq_col0.begin(), seq_col0.end()); - str_col col1(strings.begin(), strings.end()); - float32_col col2(seq_col2.begin(), seq_col2.end()); - float32_col col3(seq_col3, seq_col3 + num_rows); - - list_col col4{ - {9, 8}, {7, 6, 5}, {}, {4}, {3, 2, 1, 0}, {20, 21, 22, 23, 24}, {}, {66, 666}}; - - int16_col ages_col{{48, 27, 25, 31, 351, 351, 29, 15}, cudf::test::iterators::null_at(5)}; - struct_col col5{{ages_col}, cudf::test::iterators::null_at(4)}; - - table_view expected({col0, col1, col2, col3, col4, col5}); - - cudf::io::table_input_metadata expected_metadata(expected); - expected_metadata.column_metadata[0].set_name("col_other"); - expected_metadata.column_metadata[1].set_name("col_string"); - expected_metadata.column_metadata[2].set_name("col_another"); - expected_metadata.column_metadata[3].set_name("col_decimal"); - expected_metadata.column_metadata[4].set_name("lists"); - expected_metadata.column_metadata[5].set_name("structs"); - - auto expected_slice = cudf::slice(expected, {2, static_cast(num_rows)}); - - auto filepath = temp_env->get_temp_filepath("SlicedTable.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice) - .metadata(expected_metadata); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); - auto result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view()); - cudf::test::expect_metadata_equal(expected_metadata, result.metadata); -} - -TEST_F(OrcWriterTest, HostBuffer) -{ - constexpr auto num_rows = 100 << 10; - auto const seq_col = random_values(num_rows); - int32_col col(seq_col.begin(), seq_col.end()); - - table_view expected{{col}}; - - cudf::io::table_input_metadata expected_metadata(expected); - expected_metadata.column_metadata[0].set_name("col_other"); - - std::vector out_buffer; - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), expected) - .metadata(expected_metadata); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder( - cudf::io::source_info(out_buffer.data(), out_buffer.size())) - .use_index(false); - auto const result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); - cudf::test::expect_metadata_equal(expected_metadata, result.metadata); -} - -TEST_F(OrcWriterTest, negTimestampsNano) -{ - // This is a separate test because ORC format has a bug where writing a timestamp between -1 and 0 - // seconds from UNIX epoch is read as that timestamp + 1 second. We mimic that behavior and so - // this test has to hardcode test values which are < -1 second. - // Details: https://github.com/rapidsai/cudf/pull/5529#issuecomment-648768925 - auto timestamps_ns = - cudf::test::fixed_width_column_wrapper{ - -131968727238000000, - -1530705634500000000, - -1674638741932929000, - }; - cudf::table_view expected({timestamps_ns}); - - auto filepath = temp_env->get_temp_filepath("OrcNegTimestamp.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); - - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); - auto result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - expected.column(0), result.tbl->view().column(0), cudf::test::debug_output_level::ALL_ERRORS); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); -} - -TEST_F(OrcWriterTest, Slice) -{ - int32_col col{{1, 2, 3, 4, 5}, cudf::test::iterators::null_at(3)}; - std::vector indices{2, 5}; - std::vector result = cudf::slice(col, indices); - cudf::table_view tbl{result}; - - auto filepath = temp_env->get_temp_filepath("Slice.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); - auto read_table = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(read_table.tbl->view(), tbl); -} - -TEST_F(OrcChunkedWriterTest, SingleTable) -{ - srand(31337); - auto table1 = create_random_fixed_table(5, 5, true); - - auto filepath = temp_env->get_temp_filepath("ChunkedSingle.orc"); - cudf::io::chunked_orc_writer_options opts = - cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); - cudf::io::orc_chunked_writer(opts).write(*table1); - - cudf::io::orc_reader_options read_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); - auto result = cudf::io::read_orc(read_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *table1); -} - -TEST_F(OrcChunkedWriterTest, SimpleTable) -{ - srand(31337); - auto table1 = create_random_fixed_table(5, 5, true); - auto table2 = create_random_fixed_table(5, 5, true); - - auto full_table = cudf::concatenate(std::vector({*table1, *table2})); - - auto filepath = temp_env->get_temp_filepath("ChunkedSimple.orc"); - cudf::io::chunked_orc_writer_options opts = - cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); - cudf::io::orc_chunked_writer(opts).write(*table1).write(*table2); - - cudf::io::orc_reader_options read_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); - auto result = cudf::io::read_orc(read_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); -} - -TEST_F(OrcChunkedWriterTest, LargeTables) -{ - srand(31337); - auto table1 = create_random_fixed_table(512, 4096, true); - auto table2 = create_random_fixed_table(512, 8192, true); - - auto full_table = cudf::concatenate(std::vector({*table1, *table2})); - - auto filepath = temp_env->get_temp_filepath("ChunkedLarge.orc"); - cudf::io::chunked_orc_writer_options opts = - cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); - cudf::io::orc_chunked_writer(opts).write(*table1).write(*table2); - - cudf::io::orc_reader_options read_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); - auto result = cudf::io::read_orc(read_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); -} - -TEST_F(OrcChunkedWriterTest, ManyTables) -{ - srand(31337); - std::vector> tables; - std::vector table_views; - constexpr int num_tables = 96; - for (int idx = 0; idx < num_tables; idx++) { - auto tbl = create_random_fixed_table(16, 64, true); - table_views.push_back(*tbl); - tables.push_back(std::move(tbl)); - } - - auto expected = cudf::concatenate(table_views); - - auto filepath = temp_env->get_temp_filepath("ChunkedManyTables.orc"); - cudf::io::chunked_orc_writer_options opts = - cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); - cudf::io::orc_chunked_writer writer(opts); - std::for_each(table_views.begin(), table_views.end(), [&writer](table_view const& tbl) { - writer.write(tbl); - }); - writer.close(); - - cudf::io::orc_reader_options read_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); - auto result = cudf::io::read_orc(read_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); -} - -TEST_F(OrcChunkedWriterTest, Metadata) -{ - std::vector strings{ - "Monday", "Tuesday", "THURSDAY", "Wednesday", "Friday", "Sunday", "Saturday"}; - auto const num_rows = strings.size(); - - auto seq_col0 = random_values(num_rows); - auto seq_col2 = random_values(num_rows); - - int32_col col0(seq_col0.begin(), seq_col0.end()); - str_col col1{strings.begin(), strings.end()}; - float32_col col2(seq_col2.begin(), seq_col2.end()); - - table_view expected({col0, col1, col2}); - - cudf::io::table_input_metadata expected_metadata(expected); - expected_metadata.column_metadata[0].set_name("col_other"); - expected_metadata.column_metadata[1].set_name("col_string"); - expected_metadata.column_metadata[2].set_name("col_another"); - - auto filepath = temp_env->get_temp_filepath("ChunkedMetadata.orc"); - cudf::io::chunked_orc_writer_options opts = - cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}) - .metadata(expected_metadata); - cudf::io::orc_chunked_writer(opts).write(expected).write(expected); - - cudf::io::orc_reader_options read_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); - auto result = cudf::io::read_orc(read_opts); - - cudf::test::expect_metadata_equal(expected_metadata, result.metadata); -} - -TEST_F(OrcChunkedWriterTest, Strings) -{ - bool mask1[] = {true, true, false, true, true, true, true}; - std::vector h_strings1{"four", "score", "and", "seven", "years", "ago", "abcdefgh"}; - str_col strings1(h_strings1.begin(), h_strings1.end(), mask1); - table_view tbl1({strings1}); - - bool mask2[] = {false, true, true, true, true, true, true}; - std::vector h_strings2{"ooooo", "ppppppp", "fff", "j", "cccc", "bbb", "zzzzzzzzzzz"}; - str_col strings2(h_strings2.begin(), h_strings2.end(), mask2); - table_view tbl2({strings2}); - - auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); - - auto filepath = temp_env->get_temp_filepath("ChunkedStrings.orc"); - cudf::io::chunked_orc_writer_options opts = - cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); - cudf::io::orc_chunked_writer(opts).write(tbl1).write(tbl2); - - cudf::io::orc_reader_options read_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); - auto result = cudf::io::read_orc(read_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); -} - -TEST_F(OrcChunkedWriterTest, MismatchedTypes) -{ - srand(31337); - auto table1 = create_random_fixed_table(4, 4, true); - auto table2 = create_random_fixed_table(4, 4, true); - - auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedTypes.orc"); - cudf::io::chunked_orc_writer_options opts = - cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); - cudf::io::orc_chunked_writer writer(opts); - writer.write(*table1); - EXPECT_THROW(writer.write(*table2), cudf::logic_error); -} - -TEST_F(OrcChunkedWriterTest, ChunkedWritingAfterClosing) -{ - srand(31337); - auto table1 = create_random_fixed_table(4, 4, true); - - auto filepath = temp_env->get_temp_filepath("ChunkedWritingAfterClosing.orc"); - cudf::io::chunked_orc_writer_options opts = - cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); - cudf::io::orc_chunked_writer writer(opts); - writer.write(*table1); - writer.close(); - EXPECT_THROW(writer.write(*table1), cudf::logic_error); -} - -TEST_F(OrcChunkedWriterTest, MismatchedStructure) -{ - srand(31337); - auto table1 = create_random_fixed_table(4, 4, true); - auto table2 = create_random_fixed_table(3, 4, true); - - auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedStructure.orc"); - cudf::io::chunked_orc_writer_options opts = - cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); - cudf::io::orc_chunked_writer writer(opts); - writer.write(*table1); - EXPECT_THROW(writer.write(*table2), cudf::logic_error); -} - -TEST_F(OrcChunkedWriterTest, ReadStripes) -{ - srand(31337); - auto table1 = create_random_fixed_table(5, 5, true); - auto table2 = create_random_fixed_table(5, 5, true); - - auto full_table = cudf::concatenate(std::vector({*table2, *table1, *table2})); - - auto filepath = temp_env->get_temp_filepath("ChunkedStripes.orc"); - cudf::io::chunked_orc_writer_options opts = - cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); - cudf::io::orc_chunked_writer(opts).write(*table1).write(*table2); - - cudf::io::orc_reader_options read_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).stripes({{1, 0, 1}}); - auto result = cudf::io::read_orc(read_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); -} - -TEST_F(OrcChunkedWriterTest, ReadStripesError) -{ - srand(31337); - auto table1 = create_random_fixed_table(5, 5, true); - - auto filepath = temp_env->get_temp_filepath("ChunkedStripesError.orc"); - cudf::io::chunked_orc_writer_options opts = - cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); - cudf::io::orc_chunked_writer(opts).write(*table1); - - cudf::io::orc_reader_options read_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).stripes({{0, 1}}); - EXPECT_THROW(cudf::io::read_orc(read_opts), cudf::logic_error); - read_opts.set_stripes({{-1}}); - EXPECT_THROW(cudf::io::read_orc(read_opts), cudf::logic_error); -} - -TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize) -{ - // write out two 31 row tables and make sure they get - // read back with all their validity bits in the right place - - using T = TypeParam; - - int num_els = 31; - - bool mask[] = {false, true, true, true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, true}; - - T c1a[num_els]; - std::fill(c1a, c1a + num_els, static_cast(5)); - T c1b[num_els]; - std::fill(c1b, c1b + num_els, static_cast(6)); - column_wrapper c1a_w(c1a, c1a + num_els, mask); - column_wrapper c1b_w(c1b, c1b + num_els, mask); - table_view tbl1({c1a_w, c1b_w}); - - T c2a[num_els]; - std::fill(c2a, c2a + num_els, static_cast(8)); - T c2b[num_els]; - std::fill(c2b, c2b + num_els, static_cast(9)); - column_wrapper c2a_w(c2a, c2a + num_els, mask); - column_wrapper c2b_w(c2b, c2b + num_els, mask); - table_view tbl2({c2a_w, c2b_w}); - - auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); - - auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize.orc"); - cudf::io::chunked_orc_writer_options opts = - cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); - cudf::io::orc_chunked_writer(opts).write(tbl1).write(tbl2); - - cudf::io::orc_reader_options read_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); - auto result = cudf::io::read_orc(read_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); -} - -TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize2) -{ - // write out two 33 row tables and make sure they get - // read back with all their validity bits in the right place - - using T = TypeParam; - - int num_els = 33; - - bool mask[] = {false, true, true, true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, true, true, true}; - - T c1a[num_els]; - std::fill(c1a, c1a + num_els, static_cast(5)); - T c1b[num_els]; - std::fill(c1b, c1b + num_els, static_cast(6)); - column_wrapper c1a_w(c1a, c1a + num_els, mask); - column_wrapper c1b_w(c1b, c1b + num_els, mask); - table_view tbl1({c1a_w, c1b_w}); - - T c2a[num_els]; - std::fill(c2a, c2a + num_els, static_cast(8)); - T c2b[num_els]; - std::fill(c2b, c2b + num_els, static_cast(9)); - column_wrapper c2a_w(c2a, c2a + num_els, mask); - column_wrapper c2b_w(c2b, c2b + num_els, mask); - table_view tbl2({c2a_w, c2b_w}); - - auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); - - auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize2.orc"); - cudf::io::chunked_orc_writer_options opts = - cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); - cudf::io::orc_chunked_writer(opts).write(tbl1).write(tbl2); - - cudf::io::orc_reader_options read_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); - auto result = cudf::io::read_orc(read_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); -} - -TEST_F(OrcReaderTest, CombinedSkipRowTest) -{ - SkipRowTest skip_row; - skip_row.test(50, 75); - skip_row.test(2, 100); - skip_row.test(2, 100, 50); - skip_row.test(2, 100, 98); - skip_row.test(2, 100, 99); - skip_row.test(2, 100, 100); - skip_row.test(2, 100, 110); -} - -TEST_F(OrcStatisticsTest, Basic) -{ - auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; }); - auto ts_sequence = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i - 4) * 1000002; }); - auto dec_sequence = - cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return i * 1001; }); - auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); - - std::vector strings{ - "Monday", "Monday", "Friday", "Monday", "Friday", "Friday", "Friday", "Wednesday", "Tuesday"}; - int num_rows = strings.size(); - - column_wrapper col1( - sequence, sequence + num_rows, validity); - column_wrapper col2( - sequence, sequence + num_rows, validity); - str_col col3{strings.begin(), strings.end()}; - column_wrapper col4( - ts_sequence, ts_sequence + num_rows, validity); - column_wrapper col5( - ts_sequence, ts_sequence + num_rows, validity); - bool_col col6({true, true, true, true, true, false, false, false, false}, validity); - - cudf::test::fixed_point_column_wrapper col7( - dec_sequence, dec_sequence + num_rows, numeric::scale_type{-1}); - - table_view expected({col1, col2, col3, col4, col5, col6, col7}); - - auto filepath = temp_env->get_temp_filepath("OrcStatsMerge.orc"); - - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); - cudf::io::write_orc(out_opts); - - auto const stats = cudf::io::read_parsed_orc_statistics(cudf::io::source_info{filepath}); - - auto expected_column_names = std::vector{""}; - std::generate_n( - std::back_inserter(expected_column_names), - expected.num_columns(), - [starting_index = 0]() mutable { return "_col" + std::to_string(starting_index++); }); - EXPECT_EQ(stats.column_names, expected_column_names); - - auto validate_statistics = [&](std::vector const& stats) { - ASSERT_EQ(stats.size(), expected.num_columns() + 1); - auto& s0 = stats[0]; - EXPECT_EQ(*s0.number_of_values, 9ul); - EXPECT_TRUE(s0.has_null.has_value()); - EXPECT_FALSE(*s0.has_null); - - auto& s1 = stats[1]; - EXPECT_EQ(*s1.number_of_values, 4ul); - EXPECT_TRUE(*s1.has_null); - auto& ts1 = std::get(s1.type_specific_stats); - EXPECT_EQ(*ts1.minimum, 1); - EXPECT_EQ(*ts1.maximum, 7); - EXPECT_EQ(*ts1.sum, 16); - - auto& s2 = stats[2]; - EXPECT_EQ(*s2.number_of_values, 4ul); - EXPECT_TRUE(*s2.has_null); - auto& ts2 = std::get(s2.type_specific_stats); - EXPECT_EQ(*ts2.minimum, 1.); - EXPECT_EQ(*ts2.maximum, 7.); - EXPECT_EQ(*ts2.sum, 16.); - - auto& s3 = stats[3]; - EXPECT_EQ(*s3.number_of_values, 9ul); - EXPECT_FALSE(*s3.has_null); - auto& ts3 = std::get(s3.type_specific_stats); - EXPECT_EQ(*ts3.minimum, "Friday"); - EXPECT_EQ(*ts3.maximum, "Wednesday"); - EXPECT_EQ(*ts3.sum, 58ul); - - auto& s4 = stats[4]; - EXPECT_EQ(*s4.number_of_values, 4ul); - EXPECT_TRUE(*s4.has_null); - auto& ts4 = std::get(s4.type_specific_stats); - EXPECT_EQ(*ts4.minimum, -4); - EXPECT_EQ(*ts4.maximum, 3); - EXPECT_EQ(*ts4.minimum_utc, -4); - EXPECT_EQ(*ts4.maximum_utc, 3); - EXPECT_EQ(*ts4.minimum_nanos, 999994); - EXPECT_EQ(*ts4.maximum_nanos, 6); - - auto& s5 = stats[5]; - EXPECT_EQ(*s5.number_of_values, 4ul); - EXPECT_TRUE(*s5.has_null); - auto& ts5 = std::get(s5.type_specific_stats); - EXPECT_EQ(*ts5.minimum, -3001); - EXPECT_EQ(*ts5.maximum, 3000); - EXPECT_EQ(*ts5.minimum_utc, -3001); - EXPECT_EQ(*ts5.maximum_utc, 3000); - EXPECT_EQ(*ts5.minimum_nanos, 994000); - EXPECT_EQ(*ts5.maximum_nanos, 6000); - - auto& s6 = stats[6]; - EXPECT_EQ(*s6.number_of_values, 4ul); - EXPECT_TRUE(*s6.has_null); - auto& ts6 = std::get(s6.type_specific_stats); - EXPECT_EQ(ts6.count[0], 2); - - auto& s7 = stats[7]; - EXPECT_EQ(*s7.number_of_values, 9ul); - EXPECT_FALSE(*s7.has_null); - auto& ts7 = std::get(s7.type_specific_stats); - EXPECT_EQ(*ts7.minimum, "0.0"); - EXPECT_EQ(*ts7.maximum, "800.8"); - EXPECT_EQ(*ts7.sum, "3603.6"); - }; - - validate_statistics(stats.file_stats); - // There's only one stripe, so column stats are the same as stripe stats - validate_statistics(stats.stripes_stats[0]); -} - -TEST_F(OrcWriterTest, SlicedValidMask) -{ - std::vector strings; - // Need more than 32 elements to reproduce the issue - for (int i = 0; i < 34; ++i) - strings.emplace_back("a long string to make sure overflow affects the output"); - // An element is null only to enforce the output column to be nullable - str_col col{strings.begin(), strings.end(), cudf::test::iterators::null_at(32)}; - - // Bug tested here is easiest to reproduce when column_offset % 32 is 31 - std::vector indices{31, 34}; - auto sliced_col = cudf::slice(static_cast(col), indices); - cudf::table_view tbl{sliced_col}; - - cudf::io::table_input_metadata expected_metadata(tbl); - expected_metadata.column_metadata[0].set_name("col_string"); - - auto filepath = temp_env->get_temp_filepath("OrcStrings.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl) - .metadata(expected_metadata); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); - auto result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(tbl, result.tbl->view()); - cudf::test::expect_metadata_equal(expected_metadata, result.metadata); -} - -#if 0 -TEST_F(OrcReaderTest, Test1) -{ - std::string filepath1 = - "/home/nghiat/Devel/cudf/1/python/cudf/cudf/tests/data/orc/" - "TestOrcFile.boolean_corruption_PR_6636.orc"; - - std::string filepath2 = - "/home/nghiat/Devel/cudf/1/python/cudf/cudf/tests/data/orc/" - "TestOrcFile.boolean_corruption_PR_6702.orc"; - - { - printf("test1\n"); - cudf::io::orc_reader_options read_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{{filepath1}}); - auto result = cudf::io::read_orc(read_opts); - for (int i = 0; i < result.tbl->num_columns(); i++) { - auto& col = result.tbl->get_column(i); - cudf::test::print(col); - printf("\n"); - } - } - - { - printf("test2\n"); - cudf::io::orc_reader_options read_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{{filepath2}}); - auto result = cudf::io::read_orc(read_opts); - for (int i = 0; i < result.tbl->num_columns(); i++) { - auto& col = result.tbl->get_column(i); - cudf::test::print(col); - printf("\n"); - } - } -} - -#endif -TEST_F(OrcReaderTest, SingleInputs) -{ - srand(31533); - auto table1 = create_random_fixed_table(5, 5, true); - - auto filepath1 = temp_env->get_temp_filepath("SimpleTable1.orc"); - cudf::io::orc_writer_options write_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath1}, table1->view()); - cudf::io::write_orc(write_opts); - - cudf::io::orc_reader_options read_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{{filepath1}}); - auto result = cudf::io::read_orc(read_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *table1); -} - -TEST_F(OrcReaderTest, zstdCompressionRegression) -{ - if (cudf::io::nvcomp::is_decompression_disabled(cudf::io::nvcomp::compression_type::ZSTD)) { - GTEST_SKIP() << "Newer nvCOMP version is required"; - } - - // Test with zstd compressed orc file with high compression ratio. - constexpr uint8_t input_buffer[] = { - 0x4f, 0x52, 0x43, 0x5a, 0x00, 0x00, 0x28, 0xb5, 0x2f, 0xfd, 0xa4, 0x34, 0xc7, 0x03, 0x00, 0x74, - 0x00, 0x00, 0x18, 0x41, 0xff, 0xaa, 0x02, 0x00, 0xbb, 0xff, 0x45, 0xc8, 0x01, 0x25, 0x30, 0x04, - 0x65, 0x00, 0x00, 0x10, 0xaa, 0x1f, 0x02, 0x00, 0x01, 0x29, 0x0b, 0xc7, 0x39, 0xb8, 0x02, 0xcb, - 0xaf, 0x38, 0xc0, 0x07, 0x00, 0x00, 0x40, 0x01, 0xc0, 0x05, 0x00, 0x00, 0x46, 0x4d, 0x45, 0x00, - 0x00, 0x0a, 0x06, 0x08, 0x01, 0x10, 0x01, 0x18, 0x30, 0x0a, 0x06, 0x08, 0x02, 0x10, 0x01, 0x18, - 0x06, 0x0a, 0x06, 0x08, 0x03, 0x10, 0x01, 0x18, 0x05, 0x12, 0x02, 0x08, 0x00, 0x12, 0x04, 0x08, - 0x03, 0x10, 0x02, 0x59, 0x00, 0x00, 0x08, 0x03, 0x10, 0x63, 0x1a, 0x0c, 0x08, 0x03, 0x10, 0x00, - 0x18, 0x3b, 0x20, 0x25, 0x28, 0xa0, 0x9e, 0x75, 0x22, 0x10, 0x08, 0x0c, 0x12, 0x01, 0x01, 0x1a, - 0x09, 0x63, 0x64, 0x5f, 0x67, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x22, 0x02, 0x08, 0x07, 0x30, 0xa0, - 0x9e, 0x75, 0x08, 0x2f, 0x10, 0x05, 0x18, 0x80, 0x80, 0x10, 0x22, 0x02, 0x00, 0x0c, 0x28, 0x00, - 0x30, 0x09, 0x82, 0xf4, 0x03, 0x03, 0x4f, 0x52, 0x43, 0x17}; - - auto source = - cudf::io::source_info(reinterpret_cast(input_buffer), sizeof(input_buffer)); - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(source).use_index(false); - - cudf::io::table_with_metadata result; - CUDF_EXPECT_NO_THROW(result = cudf::io::read_orc(in_opts)); - EXPECT_EQ(1920800, result.tbl->num_rows()); -} - -TEST_F(OrcReaderTest, MultipleInputs) -{ - srand(31537); - auto table1 = create_random_fixed_table(5, 5, true); - auto table2 = create_random_fixed_table(5, 5, true); - - auto full_table = cudf::concatenate(std::vector({*table1, *table2})); - - auto const filepath1 = temp_env->get_temp_filepath("SimpleTable1.orc"); - { - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath1}, table1->view()); - cudf::io::write_orc(out_opts); - } - - auto const filepath2 = temp_env->get_temp_filepath("SimpleTable2.orc"); - { - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath2}, table2->view()); - cudf::io::write_orc(out_opts); - } - - cudf::io::orc_reader_options read_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{{filepath1, filepath2}}); - auto result = cudf::io::read_orc(read_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); -} - -struct OrcWriterTestDecimal : public OrcWriterTest, - public ::testing::WithParamInterface> {}; - -TEST_P(OrcWriterTestDecimal, Decimal64) -{ - auto const [num_rows, scale] = GetParam(); - - // Using int16_t because scale causes values to overflow if they already require 32 bits - auto const vals = random_values(num_rows); - auto data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { - return numeric::decimal64{vals[i], numeric::scale_type{scale}}; - }); - auto mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 7 == 0; }); - dec64_col col{data, data + num_rows, mask}; - cudf::table_view tbl({static_cast(col)}); - - auto filepath = temp_env->get_temp_filepath("Decimal64.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl); - - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); - auto result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(tbl.column(0), result.tbl->view().column(0)); -} - -INSTANTIATE_TEST_CASE_P(OrcWriterTest, - OrcWriterTestDecimal, - ::testing::Combine(::testing::Values(1, 10000, 10001, 34567), - ::testing::Values(-2, 0, 2))); - -TEST_F(OrcWriterTest, Decimal32) -{ - constexpr auto num_rows = 12000; - - // Using int16_t because scale causes values to overflow if they already require 32 bits - auto const vals = random_values(num_rows); - auto data = cudf::detail::make_counting_transform_iterator(0, [&vals](auto i) { - return numeric::decimal32{vals[i], numeric::scale_type{2}}; - }); - auto mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 13; }); - dec32_col col{data, data + num_rows, mask}; - cudf::table_view expected({col}); - - auto filepath = temp_env->get_temp_filepath("Decimal32.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); - - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); - auto result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(col, result.tbl->view().column(0)); -} - -TEST_F(OrcStatisticsTest, Overflow) +namespace { +// Generates a vector of uniform random values of type T +template +inline auto random_values(size_t size) { - int num_rows = 10; - auto too_large_seq = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i * (std::numeric_limits::max() / 20); }); - auto too_small_seq = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i * (std::numeric_limits::min() / 20); }); - auto not_too_large_seq = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i * (std::numeric_limits::max() / 200); }); - auto not_too_small_seq = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i * (std::numeric_limits::min() / 200); }); - auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); - - column_wrapper col1( - too_large_seq, too_large_seq + num_rows, validity); - column_wrapper col2( - too_small_seq, too_small_seq + num_rows, validity); - column_wrapper col3( - not_too_large_seq, not_too_large_seq + num_rows, validity); - column_wrapper col4( - not_too_small_seq, not_too_small_seq + num_rows, validity); - table_view tbl({col1, col2, col3, col4}); - - auto filepath = temp_env->get_temp_filepath("OrcStatsOverflow.orc"); + std::vector values(size); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl); - cudf::io::write_orc(out_opts); + using T1 = T; + using uniform_distribution = + typename std::conditional_t, + std::bernoulli_distribution, + std::conditional_t, + std::uniform_real_distribution, + std::uniform_int_distribution>>; - auto const stats = cudf::io::read_parsed_orc_statistics(cudf::io::source_info{filepath}); + static constexpr auto seed = 0xf00d; + static std::mt19937 engine{seed}; + static uniform_distribution dist{}; + std::generate_n(values.begin(), size, [&]() { return T{dist(engine)}; }); - auto check_sum_exist = [&](int idx, bool expected) { - auto const& s = stats.file_stats[idx]; - auto const& ts = std::get(s.type_specific_stats); - EXPECT_EQ(ts.sum.has_value(), expected); - }; - check_sum_exist(1, false); - check_sum_exist(2, false); - check_sum_exist(3, true); - check_sum_exist(4, true); + return values; } +} // namespace -TEST_F(OrcStatisticsTest, HasNull) -{ - // This test can now be implemented with libcudf; keeping the pandas version to keep the test - // inputs diversified - // Method to create file: - // >>> import pandas as pd - // >>> df = pd.DataFrame({'a':pd.Series([1, 2, None], dtype="Int64"), 'b':[3, 4, 5]}) - // >>> df.to_orc("temp.orc") - // - // Contents of file: - // >>> import pyarrow.orc as po - // >>> po.ORCFile('temp.orc').read() - // pyarrow.Table - // a: int64 - // b: int64 - // ---- - // a: [[1,2,null]] - // b: [[3,4,5]] - auto nulls_orc = std::array{ - 0x4F, 0x52, 0x43, 0x1D, 0x00, 0x00, 0x0A, 0x0C, 0x0A, 0x04, 0x00, 0x00, 0x00, 0x00, 0x12, 0x04, - 0x08, 0x03, 0x50, 0x00, 0x2C, 0x00, 0x00, 0xE3, 0x12, 0xE7, 0x62, 0x67, 0x80, 0x00, 0x21, 0x1E, - 0x0E, 0x26, 0x21, 0x36, 0x0E, 0x26, 0x01, 0x16, 0x09, 0xB6, 0x00, 0x46, 0x00, 0x2C, 0x00, 0x00, - 0xE3, 0x12, 0xE7, 0x62, 0x67, 0x80, 0x00, 0x21, 0x1E, 0x0E, 0x66, 0x21, 0x36, 0x0E, 0x36, 0x01, - 0x2E, 0x09, 0x89, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0xFF, 0xE0, 0x05, 0x00, 0x00, 0xFF, 0xC0, - 0x07, 0x00, 0x00, 0x46, 0x01, 0x24, 0x05, 0x00, 0x00, 0xFF, 0xE0, 0x09, 0x00, 0x00, 0x46, 0x02, - 0x68, 0xA0, 0x68, 0x00, 0x00, 0xE3, 0x62, 0xE3, 0x60, 0x13, 0x60, 0x90, 0x10, 0xE4, 0x02, 0xD1, - 0x8C, 0x12, 0x92, 0x60, 0x9A, 0x09, 0x4C, 0x33, 0x00, 0xC5, 0x59, 0xC1, 0x34, 0x23, 0x98, 0x66, - 0x04, 0xD2, 0x6C, 0x60, 0x3E, 0x13, 0x94, 0xCF, 0x24, 0xC1, 0x2E, 0xC4, 0x02, 0x52, 0x07, 0x24, - 0x99, 0x60, 0xA4, 0x14, 0x73, 0x68, 0x88, 0x33, 0x00, 0x46, 0x00, 0x00, 0xE3, 0x52, 0xE2, 0x62, - 0xE1, 0x60, 0x0E, 0x60, 0xE0, 0xE2, 0xE1, 0x60, 0x12, 0x62, 0xE3, 0x60, 0x12, 0x60, 0x91, 0x60, - 0x0B, 0x60, 0x04, 0xF2, 0x98, 0x81, 0x3C, 0x36, 0x01, 0x2E, 0x09, 0x89, 0x00, 0x06, 0x00, 0xB4, - 0x00, 0x00, 0xE3, 0x60, 0x16, 0x98, 0xC6, 0x28, 0xC5, 0xC5, 0xC1, 0x2C, 0xE0, 0x2C, 0x21, 0xA3, - 0x60, 0xAE, 0xC1, 0xAC, 0x24, 0xC4, 0xC1, 0x23, 0xC4, 0xC4, 0xC8, 0x24, 0xC5, 0x98, 0x28, 0xC5, - 0x98, 0xA4, 0xC0, 0xA0, 0xC1, 0x60, 0xC0, 0xA0, 0xC4, 0xC1, 0xC1, 0x82, 0xCE, 0x32, 0x60, 0xB6, - 0x62, 0xE1, 0x60, 0x0E, 0x60, 0xB0, 0xE2, 0xE1, 0x60, 0x12, 0x62, 0xE3, 0x60, 0x12, 0x60, 0x91, - 0x60, 0x0B, 0x60, 0x04, 0xF2, 0x98, 0x81, 0x3C, 0x36, 0x01, 0x2E, 0x09, 0x89, 0x00, 0x06, 0x87, - 0x09, 0x7E, 0x1E, 0x8C, 0x49, 0xAC, 0x86, 0x7A, 0xE6, 0x7A, 0xA6, 0x00, 0x08, 0x5D, 0x10, 0x01, - 0x18, 0x80, 0x80, 0x04, 0x22, 0x02, 0x00, 0x0C, 0x28, 0x26, 0x30, 0x06, 0x82, 0xF4, 0x03, 0x03, - 0x4F, 0x52, 0x43, 0x17, - }; - - auto const stats = cudf::io::read_parsed_orc_statistics( - cudf::io::source_info{reinterpret_cast(nulls_orc.data()), nulls_orc.size()}); - - EXPECT_EQ(stats.file_stats[1].has_null, true); - EXPECT_EQ(stats.file_stats[2].has_null, false); - - EXPECT_EQ(stats.stripes_stats[0][1].has_null, true); - EXPECT_EQ(stats.stripes_stats[0][2].has_null, false); -} +// Base test fixture for tests +struct OrcWriterTest : public cudf::test::BaseFixture {}; struct OrcWriterTestStripes : public OrcWriterTest, @@ -1438,716 +165,18 @@ TEST_P(OrcWriterTestStripes, StripeSize) cudf::io::orc_chunked_writer(opts).write(expected->view()); validate(out_buffer_chunked); } - { - std::vector out_buffer; - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), expected->view()) - .stripe_size_rows(size_rows) - .stripe_size_bytes(size_bytes); - cudf::io::write_orc(out_opts); - validate(out_buffer); - } } INSTANTIATE_TEST_CASE_P(OrcWriterTest, OrcWriterTestStripes, - ::testing::Values(std::make_tuple(800000ul, 1000000), - std::make_tuple(2000000ul, 1000000), - std::make_tuple(4000000ul, 1000000), - std::make_tuple(8000000ul, 1000000), - std::make_tuple(8000000ul, 500000), - std::make_tuple(8000000ul, 250000), - std::make_tuple(8000000ul, 100000))); - -TEST_F(OrcWriterTest, StripeSizeInvalid) -{ - auto const unused_table = std::make_unique
(); - std::vector out_buffer; - - EXPECT_THROW( - cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), unused_table->view()) - .stripe_size_rows(511), - cudf::logic_error); - EXPECT_THROW( - cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), unused_table->view()) - .stripe_size_bytes(63 << 10), - cudf::logic_error); - EXPECT_THROW( - cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), unused_table->view()) - .row_index_stride(511), - cudf::logic_error); -} - -TEST_F(OrcWriterTest, TestMap) -{ - auto const num_rows = 1200000; - auto const lists_per_row = 4; - auto const num_child_rows = (num_rows * lists_per_row) / 2; // half due to validity - - auto keys = random_values(num_child_rows); - auto vals = random_values(num_child_rows); - auto vals_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3; }); - int32_col keys_col(keys.begin(), keys.end()); - float32_col vals_col{vals.begin(), vals.end(), vals_mask}; - auto s_col = struct_col({keys_col, vals_col}).release(); - - auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); - - std::vector row_offsets(num_rows + 1); - int offset = 0; - for (int idx = 0; idx < (num_rows) + 1; ++idx) { - row_offsets[idx] = offset; - if (valids[idx]) { offset += lists_per_row; } - } - int32_col offsets(row_offsets.begin(), row_offsets.end()); - - auto num_list_rows = static_cast(offsets).size() - 1; - auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + num_list_rows); - auto list_col = cudf::make_lists_column( - num_list_rows, offsets.release(), std::move(s_col), null_count, std::move(null_mask)); - - table_view expected({*list_col}); - - cudf::io::table_input_metadata expected_metadata(expected); - expected_metadata.column_metadata[0].set_list_column_as_map(); - - auto filepath = temp_env->get_temp_filepath("MapColumn.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) - .metadata(expected_metadata); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); - auto result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); - cudf::test::expect_metadata_equal(expected_metadata, result.metadata); -} - -TEST_F(OrcReaderTest, NestedColumnSelection) -{ - auto const num_rows = 1000; - auto child_col1_data = random_values(num_rows); - auto child_col2_data = random_values(num_rows); - auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3; }); - int32_col child_col1{child_col1_data.begin(), child_col1_data.end(), validity}; - int64_col child_col2{child_col2_data.begin(), child_col2_data.end(), validity}; - struct_col s_col{child_col1, child_col2}; - table_view expected({s_col}); - - cudf::io::table_input_metadata expected_metadata(expected); - expected_metadata.column_metadata[0].set_name("struct_s"); - expected_metadata.column_metadata[0].child(0).set_name("field_a"); - expected_metadata.column_metadata[0].child(1).set_name("field_b"); - - auto filepath = temp_env->get_temp_filepath("OrcNestedSelection.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) - .metadata(std::move(expected_metadata)); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) - .use_index(false) - .columns({"struct_s.field_b"}); - auto result = cudf::io::read_orc(in_opts); - - // Verify that only one child column is included in the output table - ASSERT_EQ(1, result.tbl->view().column(0).num_children()); - // Verify that the first child column is `field_b` - int64_col expected_col{child_col2_data.begin(), child_col2_data.end(), validity}; - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_col, result.tbl->view().column(0).child(0)); - ASSERT_EQ("field_b", result.metadata.schema_info[0].children[0].name); -} - -TEST_F(OrcReaderTest, DecimalOptions) -{ - constexpr auto num_rows = 10; - auto col_vals = random_values(num_rows); - auto col_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { - return numeric::decimal128{col_vals[i], numeric::scale_type{2}}; - }); - auto mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3 == 0; }); - - dec128_col col{col_data, col_data + num_rows, mask}; - table_view expected({col}); - - cudf::io::table_input_metadata expected_metadata(expected); - expected_metadata.column_metadata[0].set_name("dec"); - - auto filepath = temp_env->get_temp_filepath("OrcDecimalOptions.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) - .metadata(std::move(expected_metadata)); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options valid_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) - .decimal128_columns({"dec", "fake_name"}); - // Should not throw, even with "fake name" - EXPECT_NO_THROW(cudf::io::read_orc(valid_opts)); -} - -TEST_F(OrcWriterTest, DecimalOptionsNested) -{ - auto const num_rows = 100; - - auto dec_vals = random_values(num_rows); - auto dec1_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { - return numeric::decimal64{dec_vals[i], numeric::scale_type{2}}; - }); - auto dec2_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { - return numeric::decimal128{dec_vals[i], numeric::scale_type{2}}; - }); - dec64_col dec1_col(dec1_data, dec1_data + num_rows); - dec128_col dec2_col(dec2_data, dec2_data + num_rows); - auto child_struct_col = cudf::test::structs_column_wrapper{dec1_col, dec2_col}; - - auto int_vals = random_values(num_rows); - int32_col int_col(int_vals.begin(), int_vals.end()); - auto map_struct_col = struct_col({child_struct_col, int_col}).release(); - - std::vector row_offsets(num_rows + 1); - std::iota(row_offsets.begin(), row_offsets.end(), 0); - int32_col offsets(row_offsets.begin(), row_offsets.end()); - - auto map_list_col = cudf::make_lists_column( - num_rows, offsets.release(), std::move(map_struct_col), 0, rmm::device_buffer{}); - - table_view expected({*map_list_col}); - - cudf::io::table_input_metadata expected_metadata(expected); - expected_metadata.column_metadata[0].set_name("maps"); - expected_metadata.column_metadata[0].set_list_column_as_map(); - expected_metadata.column_metadata[0].child(1).child(0).child(0).set_name("dec64"); - expected_metadata.column_metadata[0].child(1).child(0).child(1).set_name("dec128"); - - auto filepath = temp_env->get_temp_filepath("OrcMultiColumn.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) - .metadata(std::move(expected_metadata)); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) - .use_index(false) - // One less level of nesting because children of map columns are the child struct's children - .decimal128_columns({"maps.0.dec64"}); - auto result = cudf::io::read_orc(in_opts); - - // Both columns should be read as decimal128 - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result.tbl->view().column(0).child(1).child(0).child(0), - result.tbl->view().column(0).child(1).child(0).child(1)); -} - -TEST_F(OrcReaderTest, EmptyColumnsParam) -{ - srand(31337); - auto const expected = create_random_fixed_table(2, 4, false); - - std::vector out_buffer; - cudf::io::orc_writer_options args = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{&out_buffer}, *expected); - cudf::io::write_orc(args); - - cudf::io::orc_reader_options read_opts = - cudf::io::orc_reader_options::builder( - cudf::io::source_info{out_buffer.data(), out_buffer.size()}) - .columns({}); - auto const result = cudf::io::read_orc(read_opts); - - EXPECT_EQ(result.tbl->num_columns(), 0); - EXPECT_EQ(result.tbl->num_rows(), 0); -} - -TEST_F(OrcMetadataReaderTest, TestBasic) -{ - auto const num_rows = 1'200'000; - - auto ints = random_values(num_rows); - auto floats = random_values(num_rows); - int32_col int_col(ints.begin(), ints.end()); - float32_col float_col(floats.begin(), floats.end()); - - table_view expected({int_col, float_col}); - - cudf::io::table_input_metadata expected_metadata(expected); - expected_metadata.column_metadata[0].set_name("int_col"); - expected_metadata.column_metadata[1].set_name("float_col"); - - auto filepath = temp_env->get_temp_filepath("MetadataTest.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) - .metadata(std::move(expected_metadata)); - cudf::io::write_orc(out_opts); - - auto meta = read_orc_metadata(cudf::io::source_info{filepath}); - EXPECT_EQ(meta.num_rows(), num_rows); - - EXPECT_EQ(meta.schema().root().name(), ""); - EXPECT_EQ(meta.schema().root().type_kind(), cudf::io::orc::STRUCT); - ASSERT_EQ(meta.schema().root().num_children(), 2); - - EXPECT_EQ(meta.schema().root().child(0).name(), "int_col"); - EXPECT_EQ(meta.schema().root().child(1).name(), "float_col"); -} - -TEST_F(OrcMetadataReaderTest, TestNested) -{ - auto const num_rows = 1'200'000; - auto const lists_per_row = 4; - auto const num_child_rows = num_rows * lists_per_row; - - auto keys = random_values(num_child_rows); - auto vals = random_values(num_child_rows); - int32_col keys_col(keys.begin(), keys.end()); - float32_col vals_col(vals.begin(), vals.end()); - auto s_col = struct_col({keys_col, vals_col}).release(); - - std::vector row_offsets(num_rows + 1); - for (int idx = 0; idx < num_rows + 1; ++idx) { - row_offsets[idx] = idx * lists_per_row; - } - int32_col offsets(row_offsets.begin(), row_offsets.end()); - - auto list_col = - cudf::make_lists_column(num_rows, offsets.release(), std::move(s_col), 0, rmm::device_buffer{}); - - table_view expected({*list_col, *list_col}); - - cudf::io::table_input_metadata expected_metadata(expected); - expected_metadata.column_metadata[0].set_name("maps"); - expected_metadata.column_metadata[0].set_list_column_as_map(); - expected_metadata.column_metadata[1].set_name("lists"); - expected_metadata.column_metadata[1].child(1).child(0).set_name("int_field"); - expected_metadata.column_metadata[1].child(1).child(1).set_name("float_field"); - - auto filepath = temp_env->get_temp_filepath("MetadataTest.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) - .metadata(std::move(expected_metadata)); - cudf::io::write_orc(out_opts); - - auto meta = read_orc_metadata(cudf::io::source_info{filepath}); - EXPECT_EQ(meta.num_rows(), num_rows); - - EXPECT_EQ(meta.schema().root().name(), ""); - EXPECT_EQ(meta.schema().root().type_kind(), cudf::io::orc::STRUCT); - ASSERT_EQ(meta.schema().root().num_children(), 2); - - auto const& out_map_col = meta.schema().root().child(0); - EXPECT_EQ(out_map_col.name(), "maps"); - EXPECT_EQ(out_map_col.type_kind(), cudf::io::orc::MAP); - ASSERT_EQ(out_map_col.num_children(), 2); - EXPECT_EQ(out_map_col.child(0).name(), ""); // keys (no name in ORC) - EXPECT_EQ(out_map_col.child(1).name(), ""); // values (no name in ORC) - - auto const& out_list_col = meta.schema().root().child(1); - EXPECT_EQ(out_list_col.name(), "lists"); - EXPECT_EQ(out_list_col.type_kind(), cudf::io::orc::LIST); - ASSERT_EQ(out_list_col.num_children(), 1); - - auto const& out_list_struct_col = out_list_col.child(0); - EXPECT_EQ(out_list_struct_col.name(), ""); // elements (no name in ORC) - EXPECT_EQ(out_list_struct_col.type_kind(), cudf::io::orc::STRUCT); - ASSERT_EQ(out_list_struct_col.num_children(), 2); - - auto const& out_int_col = out_list_struct_col.child(0); - EXPECT_EQ(out_int_col.name(), "int_field"); - EXPECT_EQ(out_int_col.type_kind(), cudf::io::orc::INT); - - auto const& out_float_col = out_list_struct_col.child(1); - EXPECT_EQ(out_float_col.name(), "float_field"); - EXPECT_EQ(out_float_col.type_kind(), cudf::io::orc::FLOAT); -} - -TEST_F(OrcReaderTest, ZstdMaxCompressionRate) -{ - if (cudf::io::nvcomp::is_decompression_disabled(cudf::io::nvcomp::compression_type::ZSTD) or - cudf::io::nvcomp::is_compression_disabled(cudf::io::nvcomp::compression_type::ZSTD)) { - GTEST_SKIP() << "Newer nvCOMP version is required"; - } - - // Encodes as 64KB of zeros, which compresses to 18 bytes with ZSTD - std::vector const h_data(8 * 1024); - float32_col col(h_data.begin(), h_data.end()); - table_view expected({col}); - - auto filepath = temp_env->get_temp_filepath("OrcHugeCompRatio.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) - .compression(cudf::io::compression_type::ZSTD); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); - auto result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); -} - -TEST_F(OrcWriterTest, CompStats) -{ - auto table = create_random_fixed_table(1, 100000, true); - - auto const stats = std::make_shared(); - - std::vector unused_buffer; - cudf::io::orc_writer_options opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{&unused_buffer}, table->view()) - .compression_statistics(stats); - cudf::io::write_orc(opts); - - EXPECT_NE(stats->num_compressed_bytes(), 0); - EXPECT_EQ(stats->num_failed_bytes(), 0); - EXPECT_EQ(stats->num_skipped_bytes(), 0); - EXPECT_FALSE(std::isnan(stats->compression_ratio())); -} - -TEST_F(OrcChunkedWriterTest, CompStats) -{ - auto table = create_random_fixed_table(1, 100000, true); - - auto const stats = std::make_shared(); - - std::vector unused_buffer; - cudf::io::chunked_orc_writer_options opts = - cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{&unused_buffer}) - .compression_statistics(stats); - cudf::io::orc_chunked_writer(opts).write(*table); - - EXPECT_NE(stats->num_compressed_bytes(), 0); - EXPECT_EQ(stats->num_failed_bytes(), 0); - EXPECT_EQ(stats->num_skipped_bytes(), 0); - EXPECT_FALSE(std::isnan(stats->compression_ratio())); - - auto const single_table_comp_stats = *stats; - cudf::io::orc_chunked_writer(opts).write(*table); - - EXPECT_EQ(stats->compression_ratio(), single_table_comp_stats.compression_ratio()); - EXPECT_EQ(stats->num_compressed_bytes(), 2 * single_table_comp_stats.num_compressed_bytes()); - - EXPECT_EQ(stats->num_failed_bytes(), 0); - EXPECT_EQ(stats->num_skipped_bytes(), 0); -} - -void expect_compression_stats_empty(std::shared_ptr stats) -{ - EXPECT_EQ(stats->num_compressed_bytes(), 0); - EXPECT_EQ(stats->num_failed_bytes(), 0); - EXPECT_EQ(stats->num_skipped_bytes(), 0); - EXPECT_TRUE(std::isnan(stats->compression_ratio())); -} - -TEST_F(OrcWriterTest, CompStatsEmptyTable) -{ - auto table_no_rows = create_random_fixed_table(20, 0, false); - - auto const stats = std::make_shared(); - - std::vector unused_buffer; - cudf::io::orc_writer_options opts = cudf::io::orc_writer_options::builder( - cudf::io::sink_info{&unused_buffer}, table_no_rows->view()) - .compression_statistics(stats); - cudf::io::write_orc(opts); - - expect_compression_stats_empty(stats); -} - -TEST_F(OrcChunkedWriterTest, CompStatsEmptyTable) -{ - auto table_no_rows = create_random_fixed_table(20, 0, false); - - auto const stats = std::make_shared(); - - std::vector unused_buffer; - cudf::io::chunked_orc_writer_options opts = - cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{&unused_buffer}) - .compression_statistics(stats); - cudf::io::orc_chunked_writer(opts).write(*table_no_rows); - - expect_compression_stats_empty(stats); -} - -TEST_F(OrcWriterTest, EmptyRowGroup) -{ - std::vector ints(10000 + 5, -1); - auto mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i >= 10000; }); - int32_col col{ints.begin(), ints.end(), mask}; - table_view expected({col}); - - auto filepath = temp_env->get_temp_filepath("OrcEmptyRowGroup.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); - auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); -} - -TEST_F(OrcWriterTest, NoNullsAsNonNullable) -{ - auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); - column_wrapper col{{1, 2, 3}, valids}; - table_view expected({col}); - - cudf::io::table_input_metadata expected_metadata(expected); - expected_metadata.column_metadata[0].set_nullability(false); - - auto filepath = temp_env->get_temp_filepath("NonNullable.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) - .metadata(std::move(expected_metadata)); - // Writer should be able to write a column without nulls as non-nullable - EXPECT_NO_THROW(cudf::io::write_orc(out_opts)); -} - -TEST_F(OrcWriterTest, SlicedStringColumn) -{ - std::vector strings{"a", "bc", "def", "longer", "strings", "at the end"}; - str_col col(strings.begin(), strings.end()); - table_view expected({col}); - - // Slice the table to include the longer strings - auto expected_slice = cudf::slice(expected, {2, 6}); - - auto filepath = temp_env->get_temp_filepath("SlicedTable.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); - auto result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view()); -} - -TEST_F(OrcWriterTest, EmptyChildStringColumn) -{ - list_col col{{}, {}}; - table_view expected({col}); - - auto filepath = temp_env->get_temp_filepath("OrcEmptyChildStringColumn.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); - auto result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); -} - -template -void check_all_null_stats(cudf::io::column_statistics const& stats) -{ - EXPECT_EQ(stats.number_of_values, 0); - EXPECT_TRUE(stats.has_null); - - auto const ts = std::get(stats.type_specific_stats); - EXPECT_FALSE(ts.minimum.has_value()); - EXPECT_FALSE(ts.maximum.has_value()); - EXPECT_TRUE(ts.sum.has_value()); - EXPECT_EQ(*ts.sum, 0); -} - -TEST_F(OrcStatisticsTest, AllNulls) -{ - float64_col double_col({0., 0., 0.}, cudf::test::iterators::all_nulls()); - int32_col int_col({0, 0, 0}, cudf::test::iterators::all_nulls()); - str_col string_col({"", "", ""}, cudf::test::iterators::all_nulls()); - - cudf::table_view expected({int_col, double_col, string_col}); - - std::vector out_buffer; - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{&out_buffer}, expected); - cudf::io::write_orc(out_opts); - - auto const stats = cudf::io::read_parsed_orc_statistics( - cudf::io::source_info{out_buffer.data(), out_buffer.size()}); - - check_all_null_stats(stats.file_stats[1]); - check_all_null_stats(stats.file_stats[2]); - check_all_null_stats(stats.file_stats[3]); -} - -TEST_F(OrcWriterTest, UnorderedDictionary) -{ - std::vector strings{ - "BBBB", "BBBB", "CCCC", "BBBB", "CCCC", "EEEE", "CCCC", "AAAA", "DDDD", "EEEE"}; - str_col col(strings.begin(), strings.end()); - - table_view expected({col}); - - std::vector out_buffer_sorted; - cudf::io::orc_writer_options out_opts_sorted = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{&out_buffer_sorted}, expected); - cudf::io::write_orc(out_opts_sorted); - - cudf::io::orc_reader_options in_opts_sorted = cudf::io::orc_reader_options::builder( - cudf::io::source_info{out_buffer_sorted.data(), out_buffer_sorted.size()}); - auto const from_sorted = cudf::io::read_orc(in_opts_sorted).tbl; - - std::vector out_buffer_unsorted; - cudf::io::orc_writer_options out_opts_unsorted = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{&out_buffer_unsorted}, expected) - .enable_dictionary_sort(false); - cudf::io::write_orc(out_opts_unsorted); - - cudf::io::orc_reader_options in_opts_unsorted = cudf::io::orc_reader_options::builder( - cudf::io::source_info{out_buffer_unsorted.data(), out_buffer_unsorted.size()}); - auto const from_unsorted = cudf::io::read_orc(in_opts_unsorted).tbl; - - CUDF_TEST_EXPECT_TABLES_EQUAL(*from_sorted, *from_unsorted); -} - -TEST_F(OrcStatisticsTest, Empty) -{ - int32_col col0{}; - float64_col col1{}; - str_col col2{}; - dec64_col col3{}; - column_wrapper col4; - bool_col col5{}; - table_view expected({col0, col1, col2, col3, col4, col5}); - - std::vector out_buffer; - - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{&out_buffer}, expected); - cudf::io::write_orc(out_opts); - - auto const stats = cudf::io::read_parsed_orc_statistics( - cudf::io::source_info{out_buffer.data(), out_buffer.size()}); - - auto expected_column_names = std::vector{""}; - std::generate_n( - std::back_inserter(expected_column_names), - expected.num_columns(), - [starting_index = 0]() mutable { return "_col" + std::to_string(starting_index++); }); - EXPECT_EQ(stats.column_names, expected_column_names); - - EXPECT_EQ(stats.column_names.size(), 7); - EXPECT_EQ(stats.stripes_stats.size(), 0); - - auto const& fstats = stats.file_stats; - ASSERT_EQ(fstats.size(), 7); - auto& s0 = fstats[0]; - EXPECT_TRUE(s0.number_of_values.has_value()); - EXPECT_EQ(*s0.number_of_values, 0ul); - EXPECT_TRUE(s0.has_null.has_value()); - EXPECT_FALSE(*s0.has_null); - - auto& s1 = fstats[1]; - EXPECT_EQ(*s1.number_of_values, 0ul); - EXPECT_FALSE(*s1.has_null); - auto& ts1 = std::get(s1.type_specific_stats); - EXPECT_FALSE(ts1.minimum.has_value()); - EXPECT_FALSE(ts1.maximum.has_value()); - EXPECT_TRUE(ts1.sum.has_value()); - EXPECT_EQ(*ts1.sum, 0); - - auto& s2 = fstats[2]; - EXPECT_EQ(*s2.number_of_values, 0ul); - EXPECT_FALSE(*s2.has_null); - auto& ts2 = std::get(s2.type_specific_stats); - EXPECT_FALSE(ts2.minimum.has_value()); - EXPECT_FALSE(ts2.maximum.has_value()); - EXPECT_TRUE(ts2.sum.has_value()); - EXPECT_EQ(*ts2.sum, 0); - - auto& s3 = fstats[3]; - EXPECT_EQ(*s3.number_of_values, 0ul); - EXPECT_FALSE(*s3.has_null); - auto& ts3 = std::get(s3.type_specific_stats); - EXPECT_FALSE(ts3.minimum.has_value()); - EXPECT_FALSE(ts3.maximum.has_value()); - EXPECT_TRUE(ts3.sum.has_value()); - EXPECT_EQ(*ts3.sum, 0); - - auto& s4 = fstats[4]; - EXPECT_EQ(*s4.number_of_values, 0ul); - EXPECT_FALSE(*s4.has_null); - auto& ts4 = std::get(s4.type_specific_stats); - EXPECT_FALSE(ts4.minimum.has_value()); - EXPECT_FALSE(ts4.maximum.has_value()); - EXPECT_TRUE(ts4.sum.has_value()); - EXPECT_EQ(*ts4.sum, "0"); - - auto& s5 = fstats[5]; - EXPECT_EQ(*s5.number_of_values, 0ul); - EXPECT_FALSE(*s5.has_null); - auto& ts5 = std::get(s5.type_specific_stats); - EXPECT_FALSE(ts5.minimum.has_value()); - EXPECT_FALSE(ts5.maximum.has_value()); - EXPECT_FALSE(ts5.minimum_utc.has_value()); - EXPECT_FALSE(ts5.maximum_utc.has_value()); - EXPECT_FALSE(ts5.minimum_nanos.has_value()); - EXPECT_FALSE(ts5.maximum_nanos.has_value()); - - auto& s6 = fstats[6]; - EXPECT_EQ(*s6.number_of_values, 0ul); - EXPECT_FALSE(*s6.has_null); - auto& ts6 = std::get(s6.type_specific_stats); - EXPECT_EQ(ts6.count[0], 0); -} - -TEST_P(OrcCompressionTest, Basic) -{ - constexpr auto num_rows = 12000; - auto const compression_type = GetParam(); - - // Generate compressible data - auto int_sequence = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 100; }); - auto float_sequence = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i / 32; }); - - int32_col int_col(int_sequence, int_sequence + num_rows); - float32_col float_col(float_sequence, float_sequence + num_rows); - - table_view expected({int_col, float_col}); - - std::vector out_buffer; - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{&out_buffer}, expected) - .compression(compression_type); - cudf::io::write_orc(out_opts); - - cudf::io::orc_reader_options in_opts = cudf::io::orc_reader_options::builder( - cudf::io::source_info{out_buffer.data(), out_buffer.size()}); - auto result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); -} - -INSTANTIATE_TEST_CASE_P(OrcCompressionTest, - OrcCompressionTest, - ::testing::Values(cudf::io::compression_type::NONE, - cudf::io::compression_type::SNAPPY, - cudf::io::compression_type::LZ4, - cudf::io::compression_type::ZSTD)); - -TEST_F(OrcWriterTest, BounceBufferBug) -{ - auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 100; }); - - constexpr auto num_rows = 150000; - column_wrapper col(sequence, - sequence + num_rows); - table_view expected({col}); - - auto filepath = temp_env->get_temp_filepath("BounceBufferBug.orc"); - cudf::io::orc_writer_options out_opts = - cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) - .compression(cudf::io::compression_type::ZSTD); - cudf::io::write_orc(out_opts); -} - -CUDF_TEST_PROGRAM_MAIN() + ::testing::Values(std::make_tuple(800000ul, 1000000))); + +// INSTANTIATE_TEST_CASE_P(OrcWriterTest, +// OrcWriterTestStripes, +// ::testing::Values(std::make_tuple(800000ul, 1000000), +// std::make_tuple(2000000ul, 1000000), +// std::make_tuple(4000000ul, 1000000), +// std::make_tuple(8000000ul, 1000000), +// std::make_tuple(8000000ul, 500000), +// std::make_tuple(8000000ul, 250000), +// std::make_tuple(8000000ul, 100000))); From e7a15210df0e55100d9418778acd46e732a5cd49 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 22 Feb 2024 17:23:57 -0800 Subject: [PATCH 091/321] Implementing decode by chunks Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 62 +++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 4bb86091fb0..10cdaf31b48 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -59,6 +59,7 @@ namespace cudf::io::orc::detail { namespace { // TODO: update +// TODO: compute num stripes from chunks /** * @brief Decompresses the stripe data, at stream granularity. * @@ -683,10 +684,12 @@ void reader::impl::decompress_and_decode() { if (_file_itm_data.has_no_data()) { return; } - // auto const stripe_chunk = - // _chunk_read_data.load_stripe_chunks[_chunk_read_data.curr_decode_stripe_chunk++]; - // auto const stripe_start = stripe_chunk.start_idx; - // auto const stripe_end = stripe_chunk.start_idx + stripe_chunk.count; + auto const stripe_chunk = + _chunk_read_data.decode_stripe_chunks[_chunk_read_data.curr_decode_stripe_chunk++]; + auto const stripe_start = stripe_chunk.start_idx; + auto const stripe_end = stripe_chunk.start_idx + stripe_chunk.count; + + printf("decoding data from stripe %d -> %d\n", (int)stripe_start, (int)stripe_end); auto const rows_to_skip = _file_itm_data.rows_to_skip; auto const rows_to_read = _file_itm_data.rows_to_read; @@ -710,6 +713,8 @@ void reader::impl::decompress_and_decode() auto& null_count_prefix_sums = _file_itm_data.null_count_prefix_sums; auto& lvl_chunks = _file_itm_data.lvl_data_chunks; + null_count_prefix_sums.clear(); + // TODO: move this to global step lvl_chunks.resize(_selected_columns.num_levels()); _out_buffers.resize(_selected_columns.num_levels()); @@ -718,7 +723,8 @@ void reader::impl::decompress_and_decode() // // // TODO: move this to reader_impl.cu, decomp and decode step - std::size_t num_stripes = selected_stripes.size(); + // std::size_t num_stripes = selected_stripes.size(); + std::size_t num_stripes = stripe_chunk.count; // Iterates through levels of nested columns, child column will be one level down // compared to parent column. @@ -794,15 +800,16 @@ void reader::impl::decompress_and_decode() std::size_t num_rowgroups = 0; // TODO: Stripe and stream idx must be by chunk. - std::size_t stripe_idx = 0; + // std::size_t stripe_idx = 0; std::size_t stream_idx = 0; - // std::vector, std::size_t>> read_tasks; - for (auto const& stripe : selected_stripes) { + for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { + // for (auto const& stripe : selected_stripes) { + auto const& stripe = selected_stripes[stripe_idx]; auto const stripe_info = stripe.stripe_info; auto const stripe_footer = stripe.stripe_footer; - auto const total_data_size = gather_stream_info_and_column_desc(stripe_idx, + auto const total_data_size = gather_stream_info_and_column_desc(stripe_idx - stripe_start, level, stripe_info, stripe_footer, @@ -830,7 +837,7 @@ void reader::impl::decompress_and_decode() } // Update chunks to reference streams pointers for (std::size_t col_idx = 0; col_idx < num_columns; col_idx++) { - auto& chunk = chunks[stripe_idx][col_idx]; + auto& chunk = chunks[stripe_idx - stripe_start][col_idx]; // start row, number of rows in a each stripe and total number of rows // may change in lower levels of nesting chunk.start_row = (level == 0) @@ -877,7 +884,7 @@ void reader::impl::decompress_and_decode() stripe_start_row += num_rows_per_stripe; num_rowgroups += stripe_num_rowgroups; - stripe_idx++; + // stripe_idx++; } // for (stripe : selected_stripes) if (stripe_data.empty()) { continue; } @@ -903,17 +910,19 @@ void reader::impl::decompress_and_decode() } // Setup row group descriptors if using indexes if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { - auto decomp_data = decompress_stripe_data(_file_itm_data.compinfo_map, - *_metadata.per_file_metadata[0].decompressor, - stripe_data, - stream_info, - chunks, - row_groups, - num_stripes, - _metadata.get_row_index_stride(), - level == 0, - _stream); - stripe_data.clear(); + auto decomp_data = decompress_stripe_data( + _file_itm_data.compinfo_map, + *_metadata.per_file_metadata[0].decompressor, + stripe_data, + host_span(stream_info.data() + stripe_start, stripe_chunk.count), + chunks, + row_groups, + num_stripes, + _metadata.get_row_index_stride(), + level == 0, + _stream); + // TODO: fix this + // stripe_data.clear(); stripe_data.push_back(std::move(decomp_data)); } else { if (row_groups.size().first) { @@ -1000,12 +1009,17 @@ void reader::impl::prepare_data(uint64_t skip_rows, while (_chunk_read_data.more_stripe_to_load()) { load_data(); printf("done load data\n\n"); + + while (_chunk_read_data.more_stripe_to_decode()) { + decompress_and_decode(); + _file_itm_data.out_buffers.push_back(std::move(_out_buffers)); + } } // decompress_and_decode(); // while (_chunk_read_data.more_stripe_to_decode()) { - decompress_and_decode(); - _file_itm_data.out_buffers.push_back(std::move(_out_buffers)); + // decompress_and_decode(); + // _file_itm_data.out_buffers.push_back(std::move(_out_buffers)); // } } From fdad84e7b88df6af0ada5472175443612c358078 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 23 Feb 2024 10:48:04 -0800 Subject: [PATCH 092/321] Only to test Signed-off-by: Nghia Truong --- cpp/include/cudf/io/orc.hpp | 4 +- cpp/src/io/orc/reader_impl.cu | 57 +++++++++++++++++++------- cpp/src/io/orc/reader_impl_chunking.cu | 6 ++- cpp/tests/io/orc_test.cpp | 25 ++++++----- 4 files changed, 64 insertions(+), 28 deletions(-) diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index d512f4a6cc4..61f4681a3f4 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -1111,7 +1111,7 @@ class chunked_orc_writer_options { */ void set_stripe_size_bytes(size_t size_bytes) { - CUDF_EXPECTS(size_bytes >= 64 << 10, "64KB is the minimum stripe size"); + // CUDF_EXPECTS(size_bytes >= 64 << 10, "64KB is the minimum stripe size"); _stripe_size_bytes = size_bytes; } @@ -1127,7 +1127,7 @@ class chunked_orc_writer_options { */ void set_stripe_size_rows(size_type size_rows) { - CUDF_EXPECTS(size_rows >= 512, "maximum stripe size cannot be smaller than 512"); + // CUDF_EXPECTS(size_rows >= 512, "maximum stripe size cannot be smaller than 512"); _stripe_size_rows = size_rows; } diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 10cdaf31b48..ab57dce9680 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -17,6 +17,8 @@ // #define PRINT_DEBUG // TODO: remove +#include + #include // // @@ -463,6 +465,8 @@ void decode_stream_data(std::size_t num_dicts, { auto const num_stripes = chunks.size().first; auto const num_columns = chunks.size().second; + printf("decode %d stripess \n", (int)num_stripes); + thrust::counting_iterator col_idx_it(0); thrust::counting_iterator stripe_idx_it(0); @@ -483,6 +487,7 @@ void decode_stream_data(std::size_t num_dicts, chunks.base_device_ptr(), global_dict.data(), num_columns, num_stripes, skip_rows, stream); if (level > 0) { + printf("update_null_mask\n"); // Update nullmasks for children if parent was a struct and had null mask update_null_mask(chunks, out_buffers, stream, mr); } @@ -508,13 +513,15 @@ void decode_stream_data(std::size_t num_dicts, CUDF_EXPECTS(num_errors == 0, "ORC data decode failed"); std::for_each(col_idx_it + 0, col_idx_it + num_columns, [&](auto col_idx) { - out_buffers[col_idx].null_count() = - std::accumulate(stripe_idx_it + 0, - stripe_idx_it + num_stripes, - 0, - [&](auto null_count, auto const stripe_idx) { - return null_count + chunks[stripe_idx][col_idx].null_count; - }); + out_buffers[col_idx].null_count() = std::accumulate( + stripe_idx_it + 0, + stripe_idx_it + num_stripes, + 0, + [&](auto null_count, auto const stripe_idx) { + printf( + "null count: %d => %d\n", (int)stripe_idx, (int)chunks[stripe_idx][col_idx].null_count); + return null_count + chunks[stripe_idx][col_idx].null_count; + }); }); } @@ -689,12 +696,18 @@ void reader::impl::decompress_and_decode() auto const stripe_start = stripe_chunk.start_idx; auto const stripe_end = stripe_chunk.start_idx + stripe_chunk.count; - printf("decoding data from stripe %d -> %d\n", (int)stripe_start, (int)stripe_end); + printf("\ndecoding data from stripe %d -> %d\n", (int)stripe_start, (int)stripe_end); - auto const rows_to_skip = _file_itm_data.rows_to_skip; - auto const rows_to_read = _file_itm_data.rows_to_read; + // auto const rows_to_skip = _file_itm_data.rows_to_skip; + // auto const rows_to_read = _file_itm_data.rows_to_read; auto const& selected_stripes = _file_itm_data.selected_stripes; + auto const rows_to_skip = 0; + auto rows_to_read = 0; + for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { + rows_to_read += _metadata.per_file_metadata[0].ff.stripes[stripe_idx].numberOfRows; + } + // Set up table for converting timestamp columns from local to UTC time auto const tz_table = [&, &selected_stripes = selected_stripes] { auto const has_timestamp_column = std::any_of( @@ -780,6 +793,8 @@ void reader::impl::decompress_and_decode() // TODO: Fix logic to handle unaligned rows (rows_to_skip == 0); + printf(" use_index: %d\n", (int)use_index); + // Logically view streams as columns auto const& stream_info = _file_itm_data.lvl_stream_info[level]; @@ -805,6 +820,8 @@ void reader::impl::decompress_and_decode() for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { // for (auto const& stripe : selected_stripes) { + + printf("processing stripe_idx = %d\n", (int)stripe_idx); auto const& stripe = selected_stripes[stripe_idx]; auto const stripe_info = stripe.stripe_info; auto const stripe_footer = stripe.stripe_footer; @@ -823,14 +840,18 @@ void reader::impl::decompress_and_decode() &chunks); auto const is_stripe_data_empty = total_data_size == 0; + printf("is_stripe_data_empty: %d\n", (int)is_stripe_data_empty); + CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, "Invalid index rowgroup stream data"); auto dst_base = static_cast(stripe_data[stripe_idx].data()); auto const num_rows_per_stripe = stripe_info->numberOfRows; - auto const rowgroup_id = num_rowgroups; - auto stripe_num_rowgroups = 0; + printf(" num_rows_per_stripe : %d\n", (int)num_rows_per_stripe); + + auto const rowgroup_id = num_rowgroups; + auto stripe_num_rowgroups = 0; if (use_index) { stripe_num_rowgroups = (num_rows_per_stripe + _metadata.get_row_index_stride() - 1) / _metadata.get_row_index_stride(); @@ -877,7 +898,7 @@ void reader::impl::decompress_and_decode() } if (not is_stripe_data_empty) { for (int k = 0; k < gpu::CI_NUM_STREAMS; k++) { - chunk.streams[k] = dst_base + stream_info[chunk.strm_id[k]].dst_pos; + chunk.streams[k] = dst_base + stream_info[chunk.strm_id[k] + stripe_start].dst_pos; } } } @@ -968,6 +989,8 @@ void reader::impl::decompress_and_decode() _mr); if (nested_cols.size()) { + printf("have nested col\n"); + // Extract information to process nested child columns scan_null_counts(chunks, null_count_prefix_sums[level], _stream); @@ -1031,6 +1054,7 @@ table_with_metadata reader::impl::make_output_chunk() std::vector> out_columns; auto out_metadata = make_output_metadata(); +#if 0 // If no rows or stripes to read, return empty columns if (_file_itm_data.has_no_data() || !_chunk_read_data.has_next()) { std::transform(_selected_columns.levels[0].begin(), @@ -1048,6 +1072,7 @@ table_with_metadata reader::impl::make_output_chunk() }); return {std::make_unique
(std::move(out_columns)), std::move(out_metadata)}; } +#endif // TODO: move this into decompress_and_decode // Create columns from buffer with respective schema information. @@ -1059,6 +1084,7 @@ table_with_metadata reader::impl::make_output_chunk() for (auto& buffers : _file_itm_data.out_buffers) { // out_columns.clear(); // TODO: remove + out_metadata = make_output_metadata(); std::transform(_selected_columns.levels[0].begin(), _selected_columns.levels[0].end(), @@ -1077,12 +1103,15 @@ table_with_metadata reader::impl::make_output_chunk() col_buffer, &out_metadata.schema_info.back(), std::nullopt, _stream); }); + printf("output col: \n"); + cudf::test::print(out_columns.front()->view()); + auto tbl = std::make_unique
(std::move(out_columns)); tabs.push_back(std::move(tbl)); tv.push_back(tabs.back()->view()); // - printf(" ----- decode one chunk\n"); + printf(" ----- decode one chunk, size = %d\n", tv.back().num_rows()); fflush(stdout); // // diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 3acd13964e7..aed03245f57 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -87,7 +87,10 @@ std::size_t gather_stream_info_and_column_desc( for (auto const& stream : stripefooter->streams) { if (!stream.column_id || *stream.column_id >= orc2gdf.size()) { // Ignore reading this stream from source. - cudf::logger().warn("Unexpected stream in the input ORC source. The stream will be ignored."); + // cudf::logger().warn("Unexpected stream in the input ORC source. The stream will be + // ignored."); + printf("Unexpected stream in the input ORC source. The stream will be ignored\n"); + fflush(stdout); src_offset += stream.length; continue; } @@ -102,6 +105,7 @@ std::size_t gather_stream_info_and_column_desc( auto const schema_type = types[column_id]; if (!schema_type.subtypes.empty() && schema_type.kind == orc::STRUCT && stream.kind == orc::PRESENT) { + printf("present stream\n"); for (auto const& idx : schema_type.subtypes) { auto const child_idx = (idx < orc2gdf.size()) ? orc2gdf[idx] : -1; if (child_idx >= 0) { diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index 36ef05ecc36..5bddacf635e 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -126,10 +126,10 @@ struct OrcWriterTestStripes : public OrcWriterTest, public ::testing::WithParamInterface> {}; -TEST_P(OrcWriterTestStripes, StripeSize) +TEST_F(OrcWriterTestStripes, StripeSize) { - constexpr auto num_rows = 1000000; - auto const [size_bytes, size_rows] = GetParam(); + constexpr auto num_rows = 50; + // auto const [size_bytes, size_rows] = GetParam(); auto const seq_col = random_values(num_rows); auto const validity = @@ -138,12 +138,15 @@ TEST_P(OrcWriterTestStripes, StripeSize) std::vector> cols; cols.push_back(col.release()); + + printf("input col: \n"); + cudf::test::print(cols.front()->view()); + auto const expected = std::make_unique
(std::move(cols)); auto validate = [&](std::vector const& orc_buffer) { - auto const expected_stripe_num = - std::max(num_rows / size_rows, (num_rows * sizeof(int64_t)) / size_bytes); - auto const stats = cudf::io::read_parsed_orc_statistics( + auto const expected_stripe_num = 1; + auto const stats = cudf::io::read_parsed_orc_statistics( cudf::io::source_info(orc_buffer.data(), orc_buffer.size())); EXPECT_EQ(stats.stripes_stats.size(), expected_stripe_num); @@ -160,16 +163,16 @@ TEST_P(OrcWriterTestStripes, StripeSize) std::vector out_buffer_chunked; cudf::io::chunked_orc_writer_options opts = cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info(&out_buffer_chunked)) - .stripe_size_rows(size_rows) - .stripe_size_bytes(size_bytes); + .stripe_size_rows(1000); cudf::io::orc_chunked_writer(opts).write(expected->view()); + validate(out_buffer_chunked); } } -INSTANTIATE_TEST_CASE_P(OrcWriterTest, - OrcWriterTestStripes, - ::testing::Values(std::make_tuple(800000ul, 1000000))); +// INSTANTIATE_TEST_CASE_P(OrcWriterTest, +// OrcWriterTestStripes, +// ::testing::Values(std::make_tuple(800000ul, 1000000))); // INSTANTIATE_TEST_CASE_P(OrcWriterTest, // OrcWriterTestStripes, From 92844ec76a0ce7122e680629a4f34844b099cc4a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 23 Feb 2024 17:38:08 -0800 Subject: [PATCH 093/321] Fix bug Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 51 ++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index ab57dce9680..b02471c0880 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -77,6 +77,7 @@ namespace { * @return Device buffer to decompressed page data */ rmm::device_buffer decompress_stripe_data( + chunk const& stripe_chunk, stream_id_map const& compinfo_map, OrcDecompressor const& decompressor, host_span stripe_data, @@ -93,10 +94,26 @@ rmm::device_buffer decompress_stripe_data( std::size_t num_uncompressed_blocks = 0; std::size_t total_decomp_size = 0; - cudf::detail::hostdevice_vector compinfo( - 0, stream_info.size(), stream); + // printf("decompress #stripe: %d, ") + // TODO: use lvl_stripe_stream_chunks + std::size_t count{0}; for (auto const& info : stream_info) { + if (info.id.stripe_idx < stripe_chunk.start_idx || + info.id.stripe_idx >= stripe_chunk.start_idx + stripe_chunk.count) { + continue; + } + count++; + } + + cudf::detail::hostdevice_vector compinfo(0, count, stream); + + for (auto const& info : stream_info) { + if (info.id.stripe_idx < stripe_chunk.start_idx || + info.id.stripe_idx >= stripe_chunk.start_idx + stripe_chunk.count) { + continue; + } + #ifdef PRINT_DEBUG printf("collec stream again [%d, %d, %d, %d]: dst = %lu, length = %lu\n", (int)info.id.stripe_idx, @@ -931,19 +948,18 @@ void reader::impl::decompress_and_decode() } // Setup row group descriptors if using indexes if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { - auto decomp_data = decompress_stripe_data( - _file_itm_data.compinfo_map, - *_metadata.per_file_metadata[0].decompressor, - stripe_data, - host_span(stream_info.data() + stripe_start, stripe_chunk.count), - chunks, - row_groups, - num_stripes, - _metadata.get_row_index_stride(), - level == 0, - _stream); - // TODO: fix this - // stripe_data.clear(); + auto decomp_data = decompress_stripe_data(stripe_chunk, + _file_itm_data.compinfo_map, + *_metadata.per_file_metadata[0].decompressor, + stripe_data, + stream_info, + chunks, + row_groups, + num_stripes, + _metadata.get_row_index_stride(), + level == 0, + _stream); + stripe_data.clear(); stripe_data.push_back(std::move(decomp_data)); } else { if (row_groups.size().first) { @@ -1054,9 +1070,9 @@ table_with_metadata reader::impl::make_output_chunk() std::vector> out_columns; auto out_metadata = make_output_metadata(); -#if 0 // If no rows or stripes to read, return empty columns - if (_file_itm_data.has_no_data() || !_chunk_read_data.has_next()) { + if (_file_itm_data.has_no_data() /*|| !_chunk_read_data.has_next()*/) { + printf("has no next\n"); std::transform(_selected_columns.levels[0].begin(), _selected_columns.levels[0].end(), std::back_inserter(out_columns), @@ -1072,7 +1088,6 @@ table_with_metadata reader::impl::make_output_chunk() }); return {std::make_unique
(std::move(out_columns)), std::move(out_metadata)}; } -#endif // TODO: move this into decompress_and_decode // Create columns from buffer with respective schema information. From 370c00f48451a34926a6a0456e130070797cb8aa Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 23 Feb 2024 20:26:31 -0800 Subject: [PATCH 094/321] Fix bug Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 42 ++++++++++++++++++++++++++++++++--- cpp/tests/io/orc_test.cpp | 18 +++++++-------- 2 files changed, 48 insertions(+), 12 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index b02471c0880..78f622bf2fe 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -864,6 +864,9 @@ void reader::impl::decompress_and_decode() auto dst_base = static_cast(stripe_data[stripe_idx].data()); + printf("line %d\n", __LINE__); + fflush(stdout); + auto const num_rows_per_stripe = stripe_info->numberOfRows; printf(" num_rows_per_stripe : %d\n", (int)num_rows_per_stripe); @@ -873,6 +876,10 @@ void reader::impl::decompress_and_decode() stripe_num_rowgroups = (num_rows_per_stripe + _metadata.get_row_index_stride() - 1) / _metadata.get_row_index_stride(); } + + printf("line %d\n", __LINE__); + fflush(stdout); + // Update chunks to reference streams pointers for (std::size_t col_idx = 0; col_idx < num_columns; col_idx++) { auto& chunk = chunks[stripe_idx - stripe_start][col_idx]; @@ -919,12 +926,19 @@ void reader::impl::decompress_and_decode() } } } + + printf("line %d\n", __LINE__); + fflush(stdout); + stripe_start_row += num_rows_per_stripe; num_rowgroups += stripe_num_rowgroups; // stripe_idx++; } // for (stripe : selected_stripes) + printf("line %d\n", __LINE__); + fflush(stdout); + if (stripe_data.empty()) { continue; } // Process dataset chunk pages into output columns @@ -946,8 +960,14 @@ void reader::impl::decompress_and_decode() return meta; }); } + + printf("line %d\n", __LINE__); + fflush(stdout); + // Setup row group descriptors if using indexes if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { + printf("line %d\n", __LINE__); + fflush(stdout); auto decomp_data = decompress_stripe_data(stripe_chunk, _file_itm_data.compinfo_map, *_metadata.per_file_metadata[0].decompressor, @@ -959,8 +979,12 @@ void reader::impl::decompress_and_decode() _metadata.get_row_index_stride(), level == 0, _stream); - stripe_data.clear(); + // stripe_data.clear(); stripe_data.push_back(std::move(decomp_data)); + + printf("line %d\n", __LINE__); + fflush(stdout); + } else { if (row_groups.size().first) { chunks.host_to_device_async(_stream); @@ -978,6 +1002,9 @@ void reader::impl::decompress_and_decode() } } + printf("line %d\n", __LINE__); + fflush(stdout); + for (std::size_t i = 0; i < column_types.size(); ++i) { bool is_nullable = false; for (std::size_t j = 0; j < num_stripes; ++j) { @@ -993,6 +1020,9 @@ void reader::impl::decompress_and_decode() _out_buffers[level].emplace_back(column_types[i], n_rows, is_nullable, _stream, _mr); } + printf("line %d\n", __LINE__); + fflush(stdout); + decode_stream_data(num_dict_entries, rows_to_skip, _metadata.get_row_index_stride(), @@ -1004,6 +1034,9 @@ void reader::impl::decompress_and_decode() _stream, _mr); + printf("line %d\n", __LINE__); + fflush(stdout); + if (nested_cols.size()) { printf("have nested col\n"); @@ -1026,6 +1059,9 @@ void reader::impl::decompress_and_decode() if (not buff_data.empty()) { generate_offsets_for_list(buff_data, _stream); } } + + printf("line %d\n", __LINE__); + fflush(stdout); } // end loop level } @@ -1118,8 +1154,8 @@ table_with_metadata reader::impl::make_output_chunk() col_buffer, &out_metadata.schema_info.back(), std::nullopt, _stream); }); - printf("output col: \n"); - cudf::test::print(out_columns.front()->view()); + // printf("output col: \n"); + // cudf::test::print(out_columns.front()->view()); auto tbl = std::make_unique
(std::move(out_columns)); tabs.push_back(std::move(tbl)); diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index 5bddacf635e..2231125f5d8 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -128,7 +128,7 @@ struct OrcWriterTestStripes TEST_F(OrcWriterTestStripes, StripeSize) { - constexpr auto num_rows = 50; + constexpr auto num_rows = 1000000; // auto const [size_bytes, size_rows] = GetParam(); auto const seq_col = random_values(num_rows); @@ -139,16 +139,16 @@ TEST_F(OrcWriterTestStripes, StripeSize) std::vector> cols; cols.push_back(col.release()); - printf("input col: \n"); - cudf::test::print(cols.front()->view()); + // printf("input col: \n"); + // cudf::test::print(cols.front()->view()); auto const expected = std::make_unique
(std::move(cols)); auto validate = [&](std::vector const& orc_buffer) { - auto const expected_stripe_num = 1; - auto const stats = cudf::io::read_parsed_orc_statistics( - cudf::io::source_info(orc_buffer.data(), orc_buffer.size())); - EXPECT_EQ(stats.stripes_stats.size(), expected_stripe_num); + // auto const expected_stripe_num = 6; + // auto const stats = cudf::io::read_parsed_orc_statistics( + // cudf::io::source_info(orc_buffer.data(), orc_buffer.size())); + // EXPECT_EQ(stats.stripes_stats.size(), expected_stripe_num); cudf::io::orc_reader_options in_opts = cudf::io::orc_reader_options::builder( @@ -156,14 +156,14 @@ TEST_F(OrcWriterTestStripes, StripeSize) .use_index(false); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected->view(), result.tbl->view()); }; { std::vector out_buffer_chunked; cudf::io::chunked_orc_writer_options opts = cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info(&out_buffer_chunked)) - .stripe_size_rows(1000); + .stripe_size_rows(10000); cudf::io::orc_chunked_writer(opts).write(expected->view()); validate(out_buffer_chunked); From 00aa10485420d5ea32db6805b50e54b345d06c82 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 24 Feb 2024 08:10:57 -0800 Subject: [PATCH 095/321] Debugging Signed-off-by: Nghia Truong --- cpp/src/io/orc/aggregate_orc_metadata.cpp | 4 + cpp/src/io/orc/reader_impl.cu | 50 +- cpp/src/io/orc/reader_impl_chunking.cu | 11 +- cpp/tests/io/orc_test.cpp | 2018 ++++++++++++++++++++- 4 files changed, 2015 insertions(+), 68 deletions(-) diff --git a/cpp/src/io/orc/aggregate_orc_metadata.cpp b/cpp/src/io/orc/aggregate_orc_metadata.cpp index 620294a1e47..18afdddd82a 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.cpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.cpp @@ -200,6 +200,10 @@ aggregate_orc_metadata::select_stripes( // TODO: check for overflow here. rows_to_read += per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows; + printf(" rows_to_read : %d / %d\n", + (int)per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows, + (int)rows_to_read); + printf(" stripe to read: %d-%d\n", (int)src_file_idx, (int)stripe_idx); } selected_stripes_mapping.emplace_back(static_cast(src_file_idx), std::move(stripe_infos)); diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 78f622bf2fe..f88b931bd2b 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -864,8 +864,8 @@ void reader::impl::decompress_and_decode() auto dst_base = static_cast(stripe_data[stripe_idx].data()); - printf("line %d\n", __LINE__); - fflush(stdout); + // printf("line %d\n", __LINE__); + // fflush(stdout); auto const num_rows_per_stripe = stripe_info->numberOfRows; printf(" num_rows_per_stripe : %d\n", (int)num_rows_per_stripe); @@ -877,8 +877,8 @@ void reader::impl::decompress_and_decode() _metadata.get_row_index_stride(); } - printf("line %d\n", __LINE__); - fflush(stdout); + // printf("line %d\n", __LINE__); + // fflush(stdout); // Update chunks to reference streams pointers for (std::size_t col_idx = 0; col_idx < num_columns; col_idx++) { @@ -927,8 +927,8 @@ void reader::impl::decompress_and_decode() } } - printf("line %d\n", __LINE__); - fflush(stdout); + // printf("line %d\n", __LINE__); + // fflush(stdout); stripe_start_row += num_rows_per_stripe; num_rowgroups += stripe_num_rowgroups; @@ -936,8 +936,8 @@ void reader::impl::decompress_and_decode() // stripe_idx++; } // for (stripe : selected_stripes) - printf("line %d\n", __LINE__); - fflush(stdout); + // printf("line %d\n", __LINE__); + // fflush(stdout); if (stripe_data.empty()) { continue; } @@ -961,13 +961,13 @@ void reader::impl::decompress_and_decode() }); } - printf("line %d\n", __LINE__); - fflush(stdout); + // printf("line %d\n", __LINE__); + // fflush(stdout); // Setup row group descriptors if using indexes if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { - printf("line %d\n", __LINE__); - fflush(stdout); + // printf("line %d\n", __LINE__); + // fflush(stdout); auto decomp_data = decompress_stripe_data(stripe_chunk, _file_itm_data.compinfo_map, *_metadata.per_file_metadata[0].decompressor, @@ -982,8 +982,8 @@ void reader::impl::decompress_and_decode() // stripe_data.clear(); stripe_data.push_back(std::move(decomp_data)); - printf("line %d\n", __LINE__); - fflush(stdout); + // printf("line %d\n", __LINE__); + // fflush(stdout); } else { if (row_groups.size().first) { @@ -1002,8 +1002,8 @@ void reader::impl::decompress_and_decode() } } - printf("line %d\n", __LINE__); - fflush(stdout); + // printf("line %d\n", __LINE__); + // fflush(stdout); for (std::size_t i = 0; i < column_types.size(); ++i) { bool is_nullable = false; @@ -1020,8 +1020,8 @@ void reader::impl::decompress_and_decode() _out_buffers[level].emplace_back(column_types[i], n_rows, is_nullable, _stream, _mr); } - printf("line %d\n", __LINE__); - fflush(stdout); + // printf("line %d\n", __LINE__); + // fflush(stdout); decode_stream_data(num_dict_entries, rows_to_skip, @@ -1034,8 +1034,8 @@ void reader::impl::decompress_and_decode() _stream, _mr); - printf("line %d\n", __LINE__); - fflush(stdout); + // printf("line %d\n", __LINE__); + // fflush(stdout); if (nested_cols.size()) { printf("have nested col\n"); @@ -1060,8 +1060,8 @@ void reader::impl::decompress_and_decode() if (not buff_data.empty()) { generate_offsets_for_list(buff_data, _stream); } } - printf("line %d\n", __LINE__); - fflush(stdout); + // printf("line %d\n", __LINE__); + // fflush(stdout); } // end loop level } @@ -1083,13 +1083,13 @@ void reader::impl::prepare_data(uint64_t skip_rows, // load_data(); while (_chunk_read_data.more_stripe_to_load()) { load_data(); - printf("done load data\n\n"); while (_chunk_read_data.more_stripe_to_decode()) { decompress_and_decode(); _file_itm_data.out_buffers.push_back(std::move(_out_buffers)); } } + printf("done load and decode data\n\n"); // decompress_and_decode(); // while (_chunk_read_data.more_stripe_to_decode()) { @@ -1154,8 +1154,8 @@ table_with_metadata reader::impl::make_output_chunk() col_buffer, &out_metadata.schema_info.back(), std::nullopt, _stream); }); - // printf("output col: \n"); - // cudf::test::print(out_columns.front()->view()); + printf("output col: \n"); + cudf::test::print(out_columns.front()->view()); auto tbl = std::make_unique
(std::move(out_columns)); tabs.push_back(std::move(tbl)); diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index aed03245f57..6b72ea28a96 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -351,6 +351,8 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // Get the total number of stripes across all input files. auto const num_stripes = selected_stripes.size(); + printf("num load stripe: %d\n", (int)num_stripes); + stripe_data_read_chunks.resize(num_stripes); lvl_stripe_stream_chunks.resize(_selected_columns.num_levels()); @@ -460,6 +462,11 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // return; } + printf("total stripe sizes:\n"); + for (auto& size : total_stripe_sizes) { + printf("size: %ld, %zu\n", size.count, size.size_bytes); + } + // Compute the prefix sum of stripe data sizes. total_stripe_sizes.host_to_device_async(_stream); thrust::inclusive_scan(rmm::exec_policy(_stream), @@ -470,7 +477,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, total_stripe_sizes.device_to_host_sync(_stream); - printf("total stripe sizes:\n"); + printf("prefix sum total stripe sizes:\n"); for (auto& size : total_stripe_sizes) { printf("size: %ld, %zu\n", size.count, size.size_bytes); } @@ -521,7 +528,7 @@ void reader::impl::load_data() auto const stripe_start = stripe_chunk.start_idx; auto const stripe_end = stripe_chunk.start_idx + stripe_chunk.count; - printf("loading data from stripe %d -> %d\n", (int)stripe_start, (int)stripe_end); + printf("\n\nloading data from stripe %d -> %d\n", (int)stripe_start, (int)stripe_end); // Prepare the buffer to read raw data onto. // TODO: clear all old buffer. diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index 2231125f5d8..bb132e477dd 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -95,41 +95,1282 @@ std::unique_ptr create_random_fixed_table(cudf::size_type num_colum return std::make_unique(std::move(columns)); } +// Base test fixture for tests +struct OrcWriterTest : public cudf::test::BaseFixture {}; + +// Typed test fixture for numeric type tests +template +struct OrcWriterNumericTypeTest : public OrcWriterTest { + auto type() { return cudf::data_type{cudf::type_to_id()}; } +}; + +// Typed test fixture for timestamp type tests +template +struct OrcWriterTimestampTypeTest : public OrcWriterTest { + auto type() { return cudf::data_type{cudf::type_to_id()}; } +}; + +// Declare typed test cases +// TODO: Replace with `NumericTypes` when unsigned support is added. Issue #5351 +using SupportedTypes = cudf::test::Types; +TYPED_TEST_SUITE(OrcWriterNumericTypeTest, SupportedTypes); +using SupportedTimestampTypes = + cudf::test::RemoveIf>, + cudf::test::TimestampTypes>; +TYPED_TEST_SUITE(OrcWriterTimestampTypeTest, SupportedTimestampTypes); + +// Base test fixture for chunked writer tests +struct OrcChunkedWriterTest : public cudf::test::BaseFixture {}; + +// Typed test fixture for numeric type tests +template +struct OrcChunkedWriterNumericTypeTest : public OrcChunkedWriterTest { + auto type() { return cudf::data_type{cudf::type_to_id()}; } +}; + +// Declare typed test cases +TYPED_TEST_SUITE(OrcChunkedWriterNumericTypeTest, SupportedTypes); + +// Test fixture for reader tests +struct OrcReaderTest : public cudf::test::BaseFixture {}; + +// Test fixture for statistics tests +struct OrcStatisticsTest : public cudf::test::BaseFixture {}; + +// Test fixture for metadata tests +struct OrcMetadataReaderTest : public cudf::test::BaseFixture {}; + +struct OrcCompressionTest : public cudf::test::BaseFixture, + public ::testing::WithParamInterface {}; + namespace { // Generates a vector of uniform random values of type T template inline auto random_values(size_t size) { - std::vector values(size); + std::vector values(size); + + using T1 = T; + using uniform_distribution = + typename std::conditional_t, + std::bernoulli_distribution, + std::conditional_t, + std::uniform_real_distribution, + std::uniform_int_distribution>>; + + static constexpr auto seed = 0xf00d; + static std::mt19937 engine{seed}; + static uniform_distribution dist{}; + std::generate_n(values.begin(), size, [&]() { return T{dist(engine)}; }); + + return values; +} + +struct SkipRowTest { + int test_calls{0}; + SkipRowTest() {} + + std::unique_ptr
get_expected_result(std::string const& filepath, + int skip_rows, + int file_num_rows, + int read_num_rows) + { + auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; }); + column_wrapper input_col( + sequence, sequence + file_num_rows); + table_view input_table({input_col}); + + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, input_table); + cudf::io::write_orc(out_opts); + + auto begin_sequence = sequence, end_sequence = sequence; + if (skip_rows < file_num_rows) { + begin_sequence += skip_rows; + end_sequence += std::min(skip_rows + read_num_rows, file_num_rows); + } + column_wrapper output_col(begin_sequence, + end_sequence); + std::vector> output_cols; + output_cols.push_back(output_col.release()); + return std::make_unique
(std::move(output_cols)); + } + + void test(int skip_rows, int file_num_rows, int read_num_rows) + { + auto filepath = + temp_env->get_temp_filepath("SkipRowTest" + std::to_string(test_calls++) + ".orc"); + auto expected_result = get_expected_result(filepath, skip_rows, file_num_rows, read_num_rows); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) + .use_index(false) + .skip_rows(skip_rows) + .num_rows(read_num_rows); + auto result = cudf::io::read_orc(in_opts); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_result->view(), result.tbl->view()); + } + + void test(int skip_rows, int file_num_rows) + { + auto filepath = + temp_env->get_temp_filepath("SkipRowTest" + std::to_string(test_calls++) + ".orc"); + auto expected_result = + get_expected_result(filepath, skip_rows, file_num_rows, file_num_rows - skip_rows); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) + .use_index(false) + .skip_rows(skip_rows); + auto result = cudf::io::read_orc(in_opts); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_result->view(), result.tbl->view()); + } +}; + +} // namespace + +TYPED_TEST(OrcWriterNumericTypeTest, SingleColumn) +{ + auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; }); + + constexpr auto num_rows = 100; + column_wrapper col(sequence, + sequence + num_rows); + table_view expected({col}); + + auto filepath = temp_env->get_temp_filepath("OrcSingleColumn.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); +} + +TYPED_TEST(OrcWriterNumericTypeTest, SingleColumnWithNulls) +{ + auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; }); + auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 2); }); + + constexpr auto num_rows = 100; + column_wrapper col( + sequence, sequence + num_rows, validity); + table_view expected({col}); + + auto filepath = temp_env->get_temp_filepath("OrcSingleColumnWithNulls.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); +} + +TYPED_TEST(OrcWriterTimestampTypeTest, Timestamps) +{ + auto sequence = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (std::rand() / 10); }); + + constexpr auto num_rows = 100; + column_wrapper col(sequence, + sequence + num_rows); + table_view expected({col}); + + auto filepath = temp_env->get_temp_filepath("OrcTimestamps.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) + .use_index(false) + .timestamp_type(this->type()); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); +} + +TYPED_TEST(OrcWriterTimestampTypeTest, TimestampsWithNulls) +{ + auto sequence = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (std::rand() / 10); }); + auto validity = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i > 30) && (i < 60); }); + + constexpr auto num_rows = 100; + column_wrapper col( + sequence, sequence + num_rows, validity); + table_view expected({col}); + + auto filepath = temp_env->get_temp_filepath("OrcTimestampsWithNulls.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) + .use_index(false) + .timestamp_type(this->type()); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); +} + +TYPED_TEST(OrcWriterTimestampTypeTest, TimestampOverflow) +{ + constexpr int64_t max = std::numeric_limits::max(); + auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return max - i; }); + + constexpr auto num_rows = 100; + column_wrapper col(sequence, + sequence + num_rows); + table_view expected({col}); + + auto filepath = temp_env->get_temp_filepath("OrcTimestampOverflow.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) + .use_index(false) + .timestamp_type(this->type()); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); +} + +TEST_F(OrcWriterTest, MultiColumn) +{ + constexpr auto num_rows = 10; + + auto col0_data = random_values(num_rows); + auto col1_data = random_values(num_rows); + auto col2_data = random_values(num_rows); + auto col3_data = random_values(num_rows); + auto col4_data = random_values(num_rows); + auto col5_data = random_values(num_rows); + auto col6_vals = random_values(num_rows); + auto col6_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { + return numeric::decimal128{col6_vals[i], numeric::scale_type{12}}; + }); + auto col7_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { + return numeric::decimal128{col6_vals[i], numeric::scale_type{-12}}; + }); + + bool_col col0(col0_data.begin(), col0_data.end()); + int8_col col1(col1_data.begin(), col1_data.end()); + int16_col col2(col2_data.begin(), col2_data.end()); + int32_col col3(col3_data.begin(), col3_data.end()); + float32_col col4(col4_data.begin(), col4_data.end()); + float64_col col5(col5_data.begin(), col5_data.end()); + dec128_col col6(col6_data, col6_data + num_rows); + dec128_col col7(col7_data, col7_data + num_rows); + + list_col col8{ + {9, 8}, {7, 6, 5}, {}, {4}, {3, 2, 1, 0}, {20, 21, 22, 23, 24}, {}, {66, 666}, {}, {-1, -2}}; + + int32_col child_col{48, 27, 25, 31, 351, 351, 29, 15, -1, -99}; + struct_col col9{child_col}; + + table_view expected({col0, col1, col2, col3, col4, col5, col6, col7, col8, col9}); + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("bools"); + expected_metadata.column_metadata[1].set_name("int8s"); + expected_metadata.column_metadata[2].set_name("int16s"); + expected_metadata.column_metadata[3].set_name("int32s"); + expected_metadata.column_metadata[4].set_name("floats"); + expected_metadata.column_metadata[5].set_name("doubles"); + expected_metadata.column_metadata[6].set_name("decimal_pos_scale"); + expected_metadata.column_metadata[7].set_name("decimal_neg_scale"); + expected_metadata.column_metadata[8].set_name("lists"); + expected_metadata.column_metadata[9].set_name("structs"); + + auto filepath = temp_env->get_temp_filepath("OrcMultiColumn.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .metadata(expected_metadata); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + cudf::test::expect_metadata_equal(expected_metadata, result.metadata); +} + +TEST_F(OrcWriterTest, MultiColumnWithNulls) +{ + constexpr auto num_rows = 10; + + auto col0_data = random_values(num_rows); + auto col1_data = random_values(num_rows); + auto col2_data = random_values(num_rows); + auto col3_data = random_values(num_rows); + auto col4_data = random_values(num_rows); + auto col5_data = random_values(num_rows); + auto col6_vals = random_values(num_rows); + auto col6_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { + return numeric::decimal64{col6_vals[i], numeric::scale_type{2}}; + }); + auto col0_mask = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 2); }); + auto col1_mask = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i < 2); }); + auto col3_mask = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i == (num_rows - 1)); }); + auto col4_mask = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i >= 4 && i <= 6); }); + auto col5_mask = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i > 8); }); + auto col6_mask = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 3); }); + + bool_col col0{col0_data.begin(), col0_data.end(), col0_mask}; + int8_col col1{col1_data.begin(), col1_data.end(), col1_mask}; + int16_col col2(col2_data.begin(), col2_data.end()); + int32_col col3{col3_data.begin(), col3_data.end(), col3_mask}; + float32_col col4{col4_data.begin(), col4_data.end(), col4_mask}; + float64_col col5{col5_data.begin(), col5_data.end(), col5_mask}; + dec64_col col6{col6_data, col6_data + num_rows, col6_mask}; + list_col col7{ + {{9, 8}, {7, 6, 5}, {}, {4}, {3, 2, 1, 0}, {20, 21, 22, 23, 24}, {}, {66, 666}, {}, {-1, -2}}, + col0_mask}; + auto ages_col = cudf::test::fixed_width_column_wrapper{ + {48, 27, 25, 31, 351, 351, 29, 15, -1, -99}, {1, 0, 1, 1, 0, 1, 1, 1, 0, 1}}; + struct_col col8{{ages_col}, {0, 1, 1, 0, 1, 1, 0, 1, 1, 0}}; + table_view expected({col0, col1, col2, col3, col4, col5, col6, col7, col8}); + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("bools"); + expected_metadata.column_metadata[1].set_name("int8s"); + expected_metadata.column_metadata[2].set_name("int16s"); + expected_metadata.column_metadata[3].set_name("int32s"); + expected_metadata.column_metadata[4].set_name("floats"); + expected_metadata.column_metadata[5].set_name("doubles"); + expected_metadata.column_metadata[6].set_name("decimal"); + expected_metadata.column_metadata[7].set_name("lists"); + expected_metadata.column_metadata[8].set_name("structs"); + + auto filepath = temp_env->get_temp_filepath("OrcMultiColumnWithNulls.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .metadata(expected_metadata); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + cudf::test::expect_metadata_equal(expected_metadata, result.metadata); +} + +TEST_F(OrcWriterTest, ReadZeroRows) +{ + auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; }); + + constexpr auto num_rows = 10; + column_wrapper col(sequence, + sequence + num_rows); + table_view expected({col}); + + auto filepath = temp_env->get_temp_filepath("OrcSingleColumn.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) + .use_index(false) + .num_rows(0); + auto result = cudf::io::read_orc(in_opts); + + EXPECT_EQ(0, result.tbl->num_rows()); + EXPECT_EQ(1, result.tbl->num_columns()); +} + +TEST_F(OrcWriterTest, Strings) +{ + std::vector strings{ + "Monday", "Monday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"}; + auto const num_rows = strings.size(); + + auto seq_col0 = random_values(num_rows); + auto seq_col2 = random_values(num_rows); + + int32_col col0(seq_col0.begin(), seq_col0.end()); + str_col col1(strings.begin(), strings.end()); + float32_col col2(seq_col2.begin(), seq_col2.end()); + + table_view expected({col0, col1, col2}); + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("col_other"); + expected_metadata.column_metadata[1].set_name("col_string"); + expected_metadata.column_metadata[2].set_name("col_another"); + + auto filepath = temp_env->get_temp_filepath("OrcStrings.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .metadata(expected_metadata); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + cudf::test::expect_metadata_equal(expected_metadata, result.metadata); +} + +TEST_F(OrcWriterTest, SlicedTable) +{ + // This test checks for writing zero copy, offsetted views into existing cudf tables + + std::vector strings{ + "Monday", "Monday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"}; + auto const num_rows = strings.size(); + + auto seq_col0 = random_values(num_rows); + auto seq_col2 = random_values(num_rows); + auto vals_col3 = random_values(num_rows); + auto seq_col3 = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { + return numeric::decimal64{vals_col3[i], numeric::scale_type{2}}; + }); + + int32_col col0(seq_col0.begin(), seq_col0.end()); + str_col col1(strings.begin(), strings.end()); + float32_col col2(seq_col2.begin(), seq_col2.end()); + float32_col col3(seq_col3, seq_col3 + num_rows); + + list_col col4{ + {9, 8}, {7, 6, 5}, {}, {4}, {3, 2, 1, 0}, {20, 21, 22, 23, 24}, {}, {66, 666}}; + + int16_col ages_col{{48, 27, 25, 31, 351, 351, 29, 15}, cudf::test::iterators::null_at(5)}; + struct_col col5{{ages_col}, cudf::test::iterators::null_at(4)}; + + table_view expected({col0, col1, col2, col3, col4, col5}); + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("col_other"); + expected_metadata.column_metadata[1].set_name("col_string"); + expected_metadata.column_metadata[2].set_name("col_another"); + expected_metadata.column_metadata[3].set_name("col_decimal"); + expected_metadata.column_metadata[4].set_name("lists"); + expected_metadata.column_metadata[5].set_name("structs"); + + auto expected_slice = cudf::slice(expected, {2, static_cast(num_rows)}); + + auto filepath = temp_env->get_temp_filepath("SlicedTable.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice) + .metadata(expected_metadata); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_slice, result.tbl->view()); + cudf::test::expect_metadata_equal(expected_metadata, result.metadata); +} + +TEST_F(OrcWriterTest, HostBuffer) +{ + constexpr auto num_rows = 100 << 10; + auto const seq_col = random_values(num_rows); + int32_col col(seq_col.begin(), seq_col.end()); + + table_view expected{{col}}; + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("col_other"); + + std::vector out_buffer; + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), expected) + .metadata(expected_metadata); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder( + cudf::io::source_info(out_buffer.data(), out_buffer.size())) + .use_index(false); + auto const result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + cudf::test::expect_metadata_equal(expected_metadata, result.metadata); +} + +TEST_F(OrcWriterTest, negTimestampsNano) +{ + // This is a separate test because ORC format has a bug where writing a timestamp between -1 and 0 + // seconds from UNIX epoch is read as that timestamp + 1 second. We mimic that behavior and so + // this test has to hardcode test values which are < -1 second. + // Details: https://github.com/rapidsai/cudf/pull/5529#issuecomment-648768925 + auto timestamps_ns = + cudf::test::fixed_width_column_wrapper{ + -131968727238000000, + -1530705634500000000, + -1674638741932929000, + }; + cudf::table_view expected({timestamps_ns}); + + auto filepath = temp_env->get_temp_filepath("OrcNegTimestamp.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + expected.column(0), result.tbl->view().column(0), cudf::test::debug_output_level::ALL_ERRORS); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); +} + +TEST_F(OrcWriterTest, Slice) +{ + int32_col col{{1, 2, 3, 4, 5}, cudf::test::iterators::null_at(3)}; + std::vector indices{2, 5}; + std::vector result = cudf::slice(col, indices); + cudf::table_view tbl{result}; + + auto filepath = temp_env->get_temp_filepath("Slice.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto read_table = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(read_table.tbl->view(), tbl); +} + +TEST_F(OrcChunkedWriterTest, SingleTable) +{ + srand(31337); + auto table1 = create_random_fixed_table(5, 5, true); + + auto filepath = temp_env->get_temp_filepath("ChunkedSingle.orc"); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(*table1); + + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *table1); +} + +TEST_F(OrcChunkedWriterTest, SimpleTable) +{ + srand(31337); + auto table1 = create_random_fixed_table(5, 5, true); + auto table2 = create_random_fixed_table(5, 5, true); + + auto full_table = cudf::concatenate(std::vector({*table1, *table2})); + + auto filepath = temp_env->get_temp_filepath("ChunkedSimple.orc"); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(*table1).write(*table2); + + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table); +} + +TEST_F(OrcChunkedWriterTest, LargeTables) +{ + srand(31337); + auto table1 = create_random_fixed_table(512, 4096, true); + auto table2 = create_random_fixed_table(512, 8192, true); + + auto full_table = cudf::concatenate(std::vector({*table1, *table2})); + + auto filepath = temp_env->get_temp_filepath("ChunkedLarge.orc"); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(*table1).write(*table2); + + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table); +} + +TEST_F(OrcChunkedWriterTest, ManyTables) +{ + srand(31337); + std::vector> tables; + std::vector table_views; + constexpr int num_tables = 96; + for (int idx = 0; idx < num_tables; idx++) { + auto tbl = create_random_fixed_table(16, 64, true); + table_views.push_back(*tbl); + tables.push_back(std::move(tbl)); + } + + auto expected = cudf::concatenate(table_views); + + auto filepath = temp_env->get_temp_filepath("ChunkedManyTables.orc"); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer writer(opts); + std::for_each(table_views.begin(), table_views.end(), [&writer](table_view const& tbl) { + writer.write(tbl); + }); + writer.close(); + + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *expected); +} + +TEST_F(OrcChunkedWriterTest, Metadata) +{ + std::vector strings{ + "Monday", "Tuesday", "THURSDAY", "Wednesday", "Friday", "Sunday", "Saturday"}; + auto const num_rows = strings.size(); + + auto seq_col0 = random_values(num_rows); + auto seq_col2 = random_values(num_rows); + + int32_col col0(seq_col0.begin(), seq_col0.end()); + str_col col1{strings.begin(), strings.end()}; + float32_col col2(seq_col2.begin(), seq_col2.end()); + + table_view expected({col0, col1, col2}); + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("col_other"); + expected_metadata.column_metadata[1].set_name("col_string"); + expected_metadata.column_metadata[2].set_name("col_another"); + + auto filepath = temp_env->get_temp_filepath("ChunkedMetadata.orc"); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}) + .metadata(expected_metadata); + cudf::io::orc_chunked_writer(opts).write(expected).write(expected); + + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); + + cudf::test::expect_metadata_equal(expected_metadata, result.metadata); +} + +TEST_F(OrcChunkedWriterTest, Strings) +{ + bool mask1[] = {true, true, false, true, true, true, true}; + std::vector h_strings1{"four", "score", "and", "seven", "years", "ago", "abcdefgh"}; + str_col strings1(h_strings1.begin(), h_strings1.end(), mask1); + table_view tbl1({strings1}); + + bool mask2[] = {false, true, true, true, true, true, true}; + std::vector h_strings2{"ooooo", "ppppppp", "fff", "j", "cccc", "bbb", "zzzzzzzzzzz"}; + str_col strings2(h_strings2.begin(), h_strings2.end(), mask2); + table_view tbl2({strings2}); + + auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); + + auto filepath = temp_env->get_temp_filepath("ChunkedStrings.orc"); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(tbl1).write(tbl2); + + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *expected); +} + +TEST_F(OrcChunkedWriterTest, MismatchedTypes) +{ + srand(31337); + auto table1 = create_random_fixed_table(4, 4, true); + auto table2 = create_random_fixed_table(4, 4, true); + + auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedTypes.orc"); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer writer(opts); + writer.write(*table1); + EXPECT_THROW(writer.write(*table2), cudf::logic_error); +} + +TEST_F(OrcChunkedWriterTest, ChunkedWritingAfterClosing) +{ + srand(31337); + auto table1 = create_random_fixed_table(4, 4, true); + + auto filepath = temp_env->get_temp_filepath("ChunkedWritingAfterClosing.orc"); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer writer(opts); + writer.write(*table1); + writer.close(); + EXPECT_THROW(writer.write(*table1), cudf::logic_error); +} + +TEST_F(OrcChunkedWriterTest, MismatchedStructure) +{ + srand(31337); + auto table1 = create_random_fixed_table(4, 4, true); + auto table2 = create_random_fixed_table(3, 4, true); + + auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedStructure.orc"); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer writer(opts); + writer.write(*table1); + EXPECT_THROW(writer.write(*table2), cudf::logic_error); +} + +TEST_F(OrcChunkedWriterTest, ReadStripes) +{ + srand(31337); + auto table1 = create_random_fixed_table(1, 5, true); + auto table2 = create_random_fixed_table(1, 6, true); + + auto full_table = cudf::concatenate(std::vector({*table2, *table1, *table2})); + + auto filepath = temp_env->get_temp_filepath("ChunkedStripes.orc"); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(*table1).write(*table2); + + printf("tab 1: \n"); + cudf::test::print(table1->get_column(0).view()); + + printf("tab 2: \n"); + cudf::test::print(table2->get_column(0).view()); + + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).stripes({{1, 0, 1}}); + auto result = cudf::io::read_orc(read_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table); +} + +TEST_F(OrcChunkedWriterTest, ReadStripesError) +{ + srand(31337); + auto table1 = create_random_fixed_table(5, 5, true); + + auto filepath = temp_env->get_temp_filepath("ChunkedStripesError.orc"); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(*table1); + + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).stripes({{0, 1}}); + EXPECT_THROW(cudf::io::read_orc(read_opts), cudf::logic_error); + read_opts.set_stripes({{-1}}); + EXPECT_THROW(cudf::io::read_orc(read_opts), cudf::logic_error); +} + +TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize) +{ + // write out two 31 row tables and make sure they get + // read back with all their validity bits in the right place + + using T = TypeParam; + + int num_els = 31; + + bool mask[] = {false, true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true}; + + T c1a[num_els]; + std::fill(c1a, c1a + num_els, static_cast(5)); + T c1b[num_els]; + std::fill(c1b, c1b + num_els, static_cast(6)); + column_wrapper c1a_w(c1a, c1a + num_els, mask); + column_wrapper c1b_w(c1b, c1b + num_els, mask); + table_view tbl1({c1a_w, c1b_w}); + + T c2a[num_els]; + std::fill(c2a, c2a + num_els, static_cast(8)); + T c2b[num_els]; + std::fill(c2b, c2b + num_els, static_cast(9)); + column_wrapper c2a_w(c2a, c2a + num_els, mask); + column_wrapper c2b_w(c2b, c2b + num_els, mask); + table_view tbl2({c2a_w, c2b_w}); + + auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); + + auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize.orc"); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(tbl1).write(tbl2); + + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *expected); +} + +TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize2) +{ + // write out two 33 row tables and make sure they get + // read back with all their validity bits in the right place + + using T = TypeParam; + + int num_els = 33; + + bool mask[] = {false, true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, true}; + + T c1a[num_els]; + std::fill(c1a, c1a + num_els, static_cast(5)); + T c1b[num_els]; + std::fill(c1b, c1b + num_els, static_cast(6)); + column_wrapper c1a_w(c1a, c1a + num_els, mask); + column_wrapper c1b_w(c1b, c1b + num_els, mask); + table_view tbl1({c1a_w, c1b_w}); + + T c2a[num_els]; + std::fill(c2a, c2a + num_els, static_cast(8)); + T c2b[num_els]; + std::fill(c2b, c2b + num_els, static_cast(9)); + column_wrapper c2a_w(c2a, c2a + num_els, mask); + column_wrapper c2b_w(c2b, c2b + num_els, mask); + table_view tbl2({c2a_w, c2b_w}); + + auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); + + auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize2.orc"); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(tbl1).write(tbl2); + + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *expected); +} + +TEST_F(OrcReaderTest, CombinedSkipRowTest) +{ + SkipRowTest skip_row; + skip_row.test(50, 75); + skip_row.test(2, 100); + skip_row.test(2, 100, 50); + skip_row.test(2, 100, 98); + skip_row.test(2, 100, 99); + skip_row.test(2, 100, 100); + skip_row.test(2, 100, 110); +} + +TEST_F(OrcStatisticsTest, Basic) +{ + auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; }); + auto ts_sequence = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i - 4) * 1000002; }); + auto dec_sequence = + cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return i * 1001; }); + auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); + + std::vector strings{ + "Monday", "Monday", "Friday", "Monday", "Friday", "Friday", "Friday", "Wednesday", "Tuesday"}; + int num_rows = strings.size(); + + column_wrapper col1( + sequence, sequence + num_rows, validity); + column_wrapper col2( + sequence, sequence + num_rows, validity); + str_col col3{strings.begin(), strings.end()}; + column_wrapper col4( + ts_sequence, ts_sequence + num_rows, validity); + column_wrapper col5( + ts_sequence, ts_sequence + num_rows, validity); + bool_col col6({true, true, true, true, true, false, false, false, false}, validity); + + cudf::test::fixed_point_column_wrapper col7( + dec_sequence, dec_sequence + num_rows, numeric::scale_type{-1}); + + table_view expected({col1, col2, col3, col4, col5, col6, col7}); + + auto filepath = temp_env->get_temp_filepath("OrcStatsMerge.orc"); + + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); + + auto const stats = cudf::io::read_parsed_orc_statistics(cudf::io::source_info{filepath}); + + auto expected_column_names = std::vector{""}; + std::generate_n( + std::back_inserter(expected_column_names), + expected.num_columns(), + [starting_index = 0]() mutable { return "_col" + std::to_string(starting_index++); }); + EXPECT_EQ(stats.column_names, expected_column_names); + + auto validate_statistics = [&](std::vector const& stats) { + ASSERT_EQ(stats.size(), expected.num_columns() + 1); + auto& s0 = stats[0]; + EXPECT_EQ(*s0.number_of_values, 9ul); + EXPECT_TRUE(s0.has_null.has_value()); + EXPECT_FALSE(*s0.has_null); + + auto& s1 = stats[1]; + EXPECT_EQ(*s1.number_of_values, 4ul); + EXPECT_TRUE(*s1.has_null); + auto& ts1 = std::get(s1.type_specific_stats); + EXPECT_EQ(*ts1.minimum, 1); + EXPECT_EQ(*ts1.maximum, 7); + EXPECT_EQ(*ts1.sum, 16); + + auto& s2 = stats[2]; + EXPECT_EQ(*s2.number_of_values, 4ul); + EXPECT_TRUE(*s2.has_null); + auto& ts2 = std::get(s2.type_specific_stats); + EXPECT_EQ(*ts2.minimum, 1.); + EXPECT_EQ(*ts2.maximum, 7.); + EXPECT_EQ(*ts2.sum, 16.); + + auto& s3 = stats[3]; + EXPECT_EQ(*s3.number_of_values, 9ul); + EXPECT_FALSE(*s3.has_null); + auto& ts3 = std::get(s3.type_specific_stats); + EXPECT_EQ(*ts3.minimum, "Friday"); + EXPECT_EQ(*ts3.maximum, "Wednesday"); + EXPECT_EQ(*ts3.sum, 58ul); + + auto& s4 = stats[4]; + EXPECT_EQ(*s4.number_of_values, 4ul); + EXPECT_TRUE(*s4.has_null); + auto& ts4 = std::get(s4.type_specific_stats); + EXPECT_EQ(*ts4.minimum, -4); + EXPECT_EQ(*ts4.maximum, 3); + EXPECT_EQ(*ts4.minimum_utc, -4); + EXPECT_EQ(*ts4.maximum_utc, 3); + EXPECT_EQ(*ts4.minimum_nanos, 999994); + EXPECT_EQ(*ts4.maximum_nanos, 6); + + auto& s5 = stats[5]; + EXPECT_EQ(*s5.number_of_values, 4ul); + EXPECT_TRUE(*s5.has_null); + auto& ts5 = std::get(s5.type_specific_stats); + EXPECT_EQ(*ts5.minimum, -3001); + EXPECT_EQ(*ts5.maximum, 3000); + EXPECT_EQ(*ts5.minimum_utc, -3001); + EXPECT_EQ(*ts5.maximum_utc, 3000); + EXPECT_EQ(*ts5.minimum_nanos, 994000); + EXPECT_EQ(*ts5.maximum_nanos, 6000); + + auto& s6 = stats[6]; + EXPECT_EQ(*s6.number_of_values, 4ul); + EXPECT_TRUE(*s6.has_null); + auto& ts6 = std::get(s6.type_specific_stats); + EXPECT_EQ(ts6.count[0], 2); + + auto& s7 = stats[7]; + EXPECT_EQ(*s7.number_of_values, 9ul); + EXPECT_FALSE(*s7.has_null); + auto& ts7 = std::get(s7.type_specific_stats); + EXPECT_EQ(*ts7.minimum, "0.0"); + EXPECT_EQ(*ts7.maximum, "800.8"); + EXPECT_EQ(*ts7.sum, "3603.6"); + }; + + validate_statistics(stats.file_stats); + // There's only one stripe, so column stats are the same as stripe stats + validate_statistics(stats.stripes_stats[0]); +} + +TEST_F(OrcWriterTest, SlicedValidMask) +{ + std::vector strings; + // Need more than 32 elements to reproduce the issue + for (int i = 0; i < 34; ++i) + strings.emplace_back("a long string to make sure overflow affects the output"); + // An element is null only to enforce the output column to be nullable + str_col col{strings.begin(), strings.end(), cudf::test::iterators::null_at(32)}; + + // Bug tested here is easiest to reproduce when column_offset % 32 is 31 + std::vector indices{31, 34}; + auto sliced_col = cudf::slice(static_cast(col), indices); + cudf::table_view tbl{sliced_col}; + + cudf::io::table_input_metadata expected_metadata(tbl); + expected_metadata.column_metadata[0].set_name("col_string"); + + auto filepath = temp_env->get_temp_filepath("OrcStrings.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl) + .metadata(expected_metadata); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(tbl, result.tbl->view()); + cudf::test::expect_metadata_equal(expected_metadata, result.metadata); +} + +TEST_F(OrcReaderTest, SingleInputs) +{ + srand(31533); + auto table1 = create_random_fixed_table(5, 5, true); + + auto filepath1 = temp_env->get_temp_filepath("SimpleTable1.orc"); + cudf::io::orc_writer_options write_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath1}, table1->view()); + cudf::io::write_orc(write_opts); + + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{{filepath1}}); + auto result = cudf::io::read_orc(read_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *table1); +} + +TEST_F(OrcReaderTest, zstdCompressionRegression) +{ + if (cudf::io::nvcomp::is_decompression_disabled(cudf::io::nvcomp::compression_type::ZSTD)) { + GTEST_SKIP() << "Newer nvCOMP version is required"; + } + + // Test with zstd compressed orc file with high compression ratio. + constexpr uint8_t input_buffer[] = { + 0x4f, 0x52, 0x43, 0x5a, 0x00, 0x00, 0x28, 0xb5, 0x2f, 0xfd, 0xa4, 0x34, 0xc7, 0x03, 0x00, 0x74, + 0x00, 0x00, 0x18, 0x41, 0xff, 0xaa, 0x02, 0x00, 0xbb, 0xff, 0x45, 0xc8, 0x01, 0x25, 0x30, 0x04, + 0x65, 0x00, 0x00, 0x10, 0xaa, 0x1f, 0x02, 0x00, 0x01, 0x29, 0x0b, 0xc7, 0x39, 0xb8, 0x02, 0xcb, + 0xaf, 0x38, 0xc0, 0x07, 0x00, 0x00, 0x40, 0x01, 0xc0, 0x05, 0x00, 0x00, 0x46, 0x4d, 0x45, 0x00, + 0x00, 0x0a, 0x06, 0x08, 0x01, 0x10, 0x01, 0x18, 0x30, 0x0a, 0x06, 0x08, 0x02, 0x10, 0x01, 0x18, + 0x06, 0x0a, 0x06, 0x08, 0x03, 0x10, 0x01, 0x18, 0x05, 0x12, 0x02, 0x08, 0x00, 0x12, 0x04, 0x08, + 0x03, 0x10, 0x02, 0x59, 0x00, 0x00, 0x08, 0x03, 0x10, 0x63, 0x1a, 0x0c, 0x08, 0x03, 0x10, 0x00, + 0x18, 0x3b, 0x20, 0x25, 0x28, 0xa0, 0x9e, 0x75, 0x22, 0x10, 0x08, 0x0c, 0x12, 0x01, 0x01, 0x1a, + 0x09, 0x63, 0x64, 0x5f, 0x67, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x22, 0x02, 0x08, 0x07, 0x30, 0xa0, + 0x9e, 0x75, 0x08, 0x2f, 0x10, 0x05, 0x18, 0x80, 0x80, 0x10, 0x22, 0x02, 0x00, 0x0c, 0x28, 0x00, + 0x30, 0x09, 0x82, 0xf4, 0x03, 0x03, 0x4f, 0x52, 0x43, 0x17}; + + auto source = + cudf::io::source_info(reinterpret_cast(input_buffer), sizeof(input_buffer)); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(source).use_index(false); + + cudf::io::table_with_metadata result; + CUDF_EXPECT_NO_THROW(result = cudf::io::read_orc(in_opts)); + EXPECT_EQ(1920800, result.tbl->num_rows()); +} + +TEST_F(OrcReaderTest, MultipleInputs) +{ + srand(31537); + auto table1 = create_random_fixed_table(5, 5, true); + auto table2 = create_random_fixed_table(5, 5, true); - using T1 = T; - using uniform_distribution = - typename std::conditional_t, - std::bernoulli_distribution, - std::conditional_t, - std::uniform_real_distribution, - std::uniform_int_distribution>>; + auto full_table = cudf::concatenate(std::vector({*table1, *table2})); - static constexpr auto seed = 0xf00d; - static std::mt19937 engine{seed}; - static uniform_distribution dist{}; - std::generate_n(values.begin(), size, [&]() { return T{dist(engine)}; }); + auto const filepath1 = temp_env->get_temp_filepath("SimpleTable1.orc"); + { + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath1}, table1->view()); + cudf::io::write_orc(out_opts); + } - return values; + auto const filepath2 = temp_env->get_temp_filepath("SimpleTable2.orc"); + { + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath2}, table2->view()); + cudf::io::write_orc(out_opts); + } + + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{{filepath1, filepath2}}); + auto result = cudf::io::read_orc(read_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table); } -} // namespace -// Base test fixture for tests -struct OrcWriterTest : public cudf::test::BaseFixture {}; +struct OrcWriterTestDecimal : public OrcWriterTest, + public ::testing::WithParamInterface> {}; + +TEST_P(OrcWriterTestDecimal, Decimal64) +{ + auto const [num_rows, scale] = GetParam(); + + // Using int16_t because scale causes values to overflow if they already require 32 bits + auto const vals = random_values(num_rows); + auto data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { + return numeric::decimal64{vals[i], numeric::scale_type{scale}}; + }); + auto mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 7 == 0; }); + dec64_col col{data, data + num_rows, mask}; + cudf::table_view tbl({static_cast(col)}); + + auto filepath = temp_env->get_temp_filepath("Decimal64.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl); + + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(tbl.column(0), result.tbl->view().column(0)); +} + +INSTANTIATE_TEST_CASE_P(OrcWriterTest, + OrcWriterTestDecimal, + ::testing::Combine(::testing::Values(1, 10000, 10001, 34567), + ::testing::Values(-2, 0, 2))); + +TEST_F(OrcWriterTest, Decimal32) +{ + constexpr auto num_rows = 12000; + + // Using int16_t because scale causes values to overflow if they already require 32 bits + auto const vals = random_values(num_rows); + auto data = cudf::detail::make_counting_transform_iterator(0, [&vals](auto i) { + return numeric::decimal32{vals[i], numeric::scale_type{2}}; + }); + auto mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 13; }); + dec32_col col{data, data + num_rows, mask}; + cudf::table_view expected({col}); + + auto filepath = temp_env->get_temp_filepath("Decimal32.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(col, result.tbl->view().column(0)); +} + +TEST_F(OrcStatisticsTest, Overflow) +{ + int num_rows = 10; + auto too_large_seq = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i * (std::numeric_limits::max() / 20); }); + auto too_small_seq = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i * (std::numeric_limits::min() / 20); }); + auto not_too_large_seq = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i * (std::numeric_limits::max() / 200); }); + auto not_too_small_seq = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i * (std::numeric_limits::min() / 200); }); + auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); + + column_wrapper col1( + too_large_seq, too_large_seq + num_rows, validity); + column_wrapper col2( + too_small_seq, too_small_seq + num_rows, validity); + column_wrapper col3( + not_too_large_seq, not_too_large_seq + num_rows, validity); + column_wrapper col4( + not_too_small_seq, not_too_small_seq + num_rows, validity); + table_view tbl({col1, col2, col3, col4}); + + auto filepath = temp_env->get_temp_filepath("OrcStatsOverflow.orc"); + + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl); + cudf::io::write_orc(out_opts); + + auto const stats = cudf::io::read_parsed_orc_statistics(cudf::io::source_info{filepath}); + + auto check_sum_exist = [&](int idx, bool expected) { + auto const& s = stats.file_stats[idx]; + auto const& ts = std::get(s.type_specific_stats); + EXPECT_EQ(ts.sum.has_value(), expected); + }; + check_sum_exist(1, false); + check_sum_exist(2, false); + check_sum_exist(3, true); + check_sum_exist(4, true); +} + +TEST_F(OrcStatisticsTest, HasNull) +{ + // This test can now be implemented with libcudf; keeping the pandas version to keep the test + // inputs diversified + // Method to create file: + // >>> import pandas as pd + // >>> df = pd.DataFrame({'a':pd.Series([1, 2, None], dtype="Int64"), 'b':[3, 4, 5]}) + // >>> df.to_orc("temp.orc") + // + // Contents of file: + // >>> import pyarrow.orc as po + // >>> po.ORCFile('temp.orc').read() + // pyarrow.Table + // a: int64 + // b: int64 + // ---- + // a: [[1,2,null]] + // b: [[3,4,5]] + auto nulls_orc = std::array{ + 0x4F, 0x52, 0x43, 0x1D, 0x00, 0x00, 0x0A, 0x0C, 0x0A, 0x04, 0x00, 0x00, 0x00, 0x00, 0x12, 0x04, + 0x08, 0x03, 0x50, 0x00, 0x2C, 0x00, 0x00, 0xE3, 0x12, 0xE7, 0x62, 0x67, 0x80, 0x00, 0x21, 0x1E, + 0x0E, 0x26, 0x21, 0x36, 0x0E, 0x26, 0x01, 0x16, 0x09, 0xB6, 0x00, 0x46, 0x00, 0x2C, 0x00, 0x00, + 0xE3, 0x12, 0xE7, 0x62, 0x67, 0x80, 0x00, 0x21, 0x1E, 0x0E, 0x66, 0x21, 0x36, 0x0E, 0x36, 0x01, + 0x2E, 0x09, 0x89, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0xFF, 0xE0, 0x05, 0x00, 0x00, 0xFF, 0xC0, + 0x07, 0x00, 0x00, 0x46, 0x01, 0x24, 0x05, 0x00, 0x00, 0xFF, 0xE0, 0x09, 0x00, 0x00, 0x46, 0x02, + 0x68, 0xA0, 0x68, 0x00, 0x00, 0xE3, 0x62, 0xE3, 0x60, 0x13, 0x60, 0x90, 0x10, 0xE4, 0x02, 0xD1, + 0x8C, 0x12, 0x92, 0x60, 0x9A, 0x09, 0x4C, 0x33, 0x00, 0xC5, 0x59, 0xC1, 0x34, 0x23, 0x98, 0x66, + 0x04, 0xD2, 0x6C, 0x60, 0x3E, 0x13, 0x94, 0xCF, 0x24, 0xC1, 0x2E, 0xC4, 0x02, 0x52, 0x07, 0x24, + 0x99, 0x60, 0xA4, 0x14, 0x73, 0x68, 0x88, 0x33, 0x00, 0x46, 0x00, 0x00, 0xE3, 0x52, 0xE2, 0x62, + 0xE1, 0x60, 0x0E, 0x60, 0xE0, 0xE2, 0xE1, 0x60, 0x12, 0x62, 0xE3, 0x60, 0x12, 0x60, 0x91, 0x60, + 0x0B, 0x60, 0x04, 0xF2, 0x98, 0x81, 0x3C, 0x36, 0x01, 0x2E, 0x09, 0x89, 0x00, 0x06, 0x00, 0xB4, + 0x00, 0x00, 0xE3, 0x60, 0x16, 0x98, 0xC6, 0x28, 0xC5, 0xC5, 0xC1, 0x2C, 0xE0, 0x2C, 0x21, 0xA3, + 0x60, 0xAE, 0xC1, 0xAC, 0x24, 0xC4, 0xC1, 0x23, 0xC4, 0xC4, 0xC8, 0x24, 0xC5, 0x98, 0x28, 0xC5, + 0x98, 0xA4, 0xC0, 0xA0, 0xC1, 0x60, 0xC0, 0xA0, 0xC4, 0xC1, 0xC1, 0x82, 0xCE, 0x32, 0x60, 0xB6, + 0x62, 0xE1, 0x60, 0x0E, 0x60, 0xB0, 0xE2, 0xE1, 0x60, 0x12, 0x62, 0xE3, 0x60, 0x12, 0x60, 0x91, + 0x60, 0x0B, 0x60, 0x04, 0xF2, 0x98, 0x81, 0x3C, 0x36, 0x01, 0x2E, 0x09, 0x89, 0x00, 0x06, 0x87, + 0x09, 0x7E, 0x1E, 0x8C, 0x49, 0xAC, 0x86, 0x7A, 0xE6, 0x7A, 0xA6, 0x00, 0x08, 0x5D, 0x10, 0x01, + 0x18, 0x80, 0x80, 0x04, 0x22, 0x02, 0x00, 0x0C, 0x28, 0x26, 0x30, 0x06, 0x82, 0xF4, 0x03, 0x03, + 0x4F, 0x52, 0x43, 0x17, + }; + + auto const stats = cudf::io::read_parsed_orc_statistics( + cudf::io::source_info{reinterpret_cast(nulls_orc.data()), nulls_orc.size()}); + + EXPECT_EQ(stats.file_stats[1].has_null, true); + EXPECT_EQ(stats.file_stats[2].has_null, false); + + EXPECT_EQ(stats.stripes_stats[0][1].has_null, true); + EXPECT_EQ(stats.stripes_stats[0][2].has_null, false); +} struct OrcWriterTestStripes : public OrcWriterTest, public ::testing::WithParamInterface> {}; -TEST_F(OrcWriterTestStripes, StripeSize) +TEST_P(OrcWriterTestStripes, StripeSize) { - constexpr auto num_rows = 1000000; - // auto const [size_bytes, size_rows] = GetParam(); + constexpr auto num_rows = 1000000; + auto const [size_bytes, size_rows] = GetParam(); auto const seq_col = random_values(num_rows); auto const validity = @@ -138,17 +1379,14 @@ TEST_F(OrcWriterTestStripes, StripeSize) std::vector> cols; cols.push_back(col.release()); - - // printf("input col: \n"); - // cudf::test::print(cols.front()->view()); - auto const expected = std::make_unique
(std::move(cols)); auto validate = [&](std::vector const& orc_buffer) { - // auto const expected_stripe_num = 6; - // auto const stats = cudf::io::read_parsed_orc_statistics( - // cudf::io::source_info(orc_buffer.data(), orc_buffer.size())); - // EXPECT_EQ(stats.stripes_stats.size(), expected_stripe_num); + auto const expected_stripe_num = + std::max(num_rows / size_rows, (num_rows * sizeof(int64_t)) / size_bytes); + auto const stats = cudf::io::read_parsed_orc_statistics( + cudf::io::source_info(orc_buffer.data(), orc_buffer.size())); + EXPECT_EQ(stats.stripes_stats.size(), expected_stripe_num); cudf::io::orc_reader_options in_opts = cudf::io::orc_reader_options::builder( @@ -163,23 +1401,721 @@ TEST_F(OrcWriterTestStripes, StripeSize) std::vector out_buffer_chunked; cudf::io::chunked_orc_writer_options opts = cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info(&out_buffer_chunked)) - .stripe_size_rows(10000); + .stripe_size_rows(size_rows) + .stripe_size_bytes(size_bytes); cudf::io::orc_chunked_writer(opts).write(expected->view()); - validate(out_buffer_chunked); } + { + std::vector out_buffer; + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), expected->view()) + .stripe_size_rows(size_rows) + .stripe_size_bytes(size_bytes); + cudf::io::write_orc(out_opts); + validate(out_buffer); + } +} + +INSTANTIATE_TEST_CASE_P(OrcWriterTest, + OrcWriterTestStripes, + ::testing::Values(std::make_tuple(800000ul, 1000000), + std::make_tuple(2000000ul, 1000000), + std::make_tuple(4000000ul, 1000000), + std::make_tuple(8000000ul, 1000000), + std::make_tuple(8000000ul, 500000), + std::make_tuple(8000000ul, 250000), + std::make_tuple(8000000ul, 100000))); + +TEST_F(OrcWriterTest, StripeSizeInvalid) +{ + auto const unused_table = std::make_unique
(); + std::vector out_buffer; + + EXPECT_THROW( + cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), unused_table->view()) + .stripe_size_rows(511), + cudf::logic_error); + EXPECT_THROW( + cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), unused_table->view()) + .stripe_size_bytes(63 << 10), + cudf::logic_error); + EXPECT_THROW( + cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), unused_table->view()) + .row_index_stride(511), + cudf::logic_error); +} + +TEST_F(OrcWriterTest, TestMap) +{ + auto const num_rows = 1200000; + auto const lists_per_row = 4; + auto const num_child_rows = (num_rows * lists_per_row) / 2; // half due to validity + + auto keys = random_values(num_child_rows); + auto vals = random_values(num_child_rows); + auto vals_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3; }); + int32_col keys_col(keys.begin(), keys.end()); + float32_col vals_col{vals.begin(), vals.end(), vals_mask}; + auto s_col = struct_col({keys_col, vals_col}).release(); + + auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); + + std::vector row_offsets(num_rows + 1); + int offset = 0; + for (int idx = 0; idx < (num_rows) + 1; ++idx) { + row_offsets[idx] = offset; + if (valids[idx]) { offset += lists_per_row; } + } + int32_col offsets(row_offsets.begin(), row_offsets.end()); + + auto num_list_rows = static_cast(offsets).size() - 1; + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + num_list_rows); + auto list_col = cudf::make_lists_column( + num_list_rows, offsets.release(), std::move(s_col), null_count, std::move(null_mask)); + + table_view expected({*list_col}); + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_list_column_as_map(); + + auto filepath = temp_env->get_temp_filepath("MapColumn.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .metadata(expected_metadata); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + cudf::test::expect_metadata_equal(expected_metadata, result.metadata); +} + +TEST_F(OrcReaderTest, NestedColumnSelection) +{ + auto const num_rows = 1000; + auto child_col1_data = random_values(num_rows); + auto child_col2_data = random_values(num_rows); + auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3; }); + int32_col child_col1{child_col1_data.begin(), child_col1_data.end(), validity}; + int64_col child_col2{child_col2_data.begin(), child_col2_data.end(), validity}; + struct_col s_col{child_col1, child_col2}; + table_view expected({s_col}); + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("struct_s"); + expected_metadata.column_metadata[0].child(0).set_name("field_a"); + expected_metadata.column_metadata[0].child(1).set_name("field_b"); + + auto filepath = temp_env->get_temp_filepath("OrcNestedSelection.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .metadata(std::move(expected_metadata)); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) + .use_index(false) + .columns({"struct_s.field_b"}); + auto result = cudf::io::read_orc(in_opts); + + // Verify that only one child column is included in the output table + ASSERT_EQ(1, result.tbl->view().column(0).num_children()); + // Verify that the first child column is `field_b` + int64_col expected_col{child_col2_data.begin(), child_col2_data.end(), validity}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_col, result.tbl->view().column(0).child(0)); + ASSERT_EQ("field_b", result.metadata.schema_info[0].children[0].name); +} + +TEST_F(OrcReaderTest, DecimalOptions) +{ + constexpr auto num_rows = 10; + auto col_vals = random_values(num_rows); + auto col_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { + return numeric::decimal128{col_vals[i], numeric::scale_type{2}}; + }); + auto mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3 == 0; }); + + dec128_col col{col_data, col_data + num_rows, mask}; + table_view expected({col}); + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("dec"); + + auto filepath = temp_env->get_temp_filepath("OrcDecimalOptions.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .metadata(std::move(expected_metadata)); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options valid_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) + .decimal128_columns({"dec", "fake_name"}); + // Should not throw, even with "fake name" + EXPECT_NO_THROW(cudf::io::read_orc(valid_opts)); +} + +TEST_F(OrcWriterTest, DecimalOptionsNested) +{ + auto const num_rows = 100; + + auto dec_vals = random_values(num_rows); + auto dec1_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { + return numeric::decimal64{dec_vals[i], numeric::scale_type{2}}; + }); + auto dec2_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { + return numeric::decimal128{dec_vals[i], numeric::scale_type{2}}; + }); + dec64_col dec1_col(dec1_data, dec1_data + num_rows); + dec128_col dec2_col(dec2_data, dec2_data + num_rows); + auto child_struct_col = cudf::test::structs_column_wrapper{dec1_col, dec2_col}; + + auto int_vals = random_values(num_rows); + int32_col int_col(int_vals.begin(), int_vals.end()); + auto map_struct_col = struct_col({child_struct_col, int_col}).release(); + + std::vector row_offsets(num_rows + 1); + std::iota(row_offsets.begin(), row_offsets.end(), 0); + int32_col offsets(row_offsets.begin(), row_offsets.end()); + + auto map_list_col = cudf::make_lists_column( + num_rows, offsets.release(), std::move(map_struct_col), 0, rmm::device_buffer{}); + + table_view expected({*map_list_col}); + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("maps"); + expected_metadata.column_metadata[0].set_list_column_as_map(); + expected_metadata.column_metadata[0].child(1).child(0).child(0).set_name("dec64"); + expected_metadata.column_metadata[0].child(1).child(0).child(1).set_name("dec128"); + + auto filepath = temp_env->get_temp_filepath("OrcMultiColumn.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .metadata(std::move(expected_metadata)); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) + .use_index(false) + // One less level of nesting because children of map columns are the child struct's children + .decimal128_columns({"maps.0.dec64"}); + auto result = cudf::io::read_orc(in_opts); + + // Both columns should be read as decimal128 + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result.tbl->view().column(0).child(1).child(0).child(0), + result.tbl->view().column(0).child(1).child(0).child(1)); +} + +TEST_F(OrcReaderTest, EmptyColumnsParam) +{ + srand(31337); + auto const expected = create_random_fixed_table(2, 4, false); + + std::vector out_buffer; + cudf::io::orc_writer_options args = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{&out_buffer}, *expected); + cudf::io::write_orc(args); + + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder( + cudf::io::source_info{out_buffer.data(), out_buffer.size()}) + .columns({}); + auto const result = cudf::io::read_orc(read_opts); + + EXPECT_EQ(result.tbl->num_columns(), 0); + EXPECT_EQ(result.tbl->num_rows(), 0); +} + +TEST_F(OrcMetadataReaderTest, TestBasic) +{ + auto const num_rows = 1'200'000; + + auto ints = random_values(num_rows); + auto floats = random_values(num_rows); + int32_col int_col(ints.begin(), ints.end()); + float32_col float_col(floats.begin(), floats.end()); + + table_view expected({int_col, float_col}); + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("int_col"); + expected_metadata.column_metadata[1].set_name("float_col"); + + auto filepath = temp_env->get_temp_filepath("MetadataTest.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .metadata(std::move(expected_metadata)); + cudf::io::write_orc(out_opts); + + auto meta = read_orc_metadata(cudf::io::source_info{filepath}); + EXPECT_EQ(meta.num_rows(), num_rows); + + EXPECT_EQ(meta.schema().root().name(), ""); + EXPECT_EQ(meta.schema().root().type_kind(), cudf::io::orc::STRUCT); + ASSERT_EQ(meta.schema().root().num_children(), 2); + + EXPECT_EQ(meta.schema().root().child(0).name(), "int_col"); + EXPECT_EQ(meta.schema().root().child(1).name(), "float_col"); +} + +TEST_F(OrcMetadataReaderTest, TestNested) +{ + auto const num_rows = 1'200'000; + auto const lists_per_row = 4; + auto const num_child_rows = num_rows * lists_per_row; + + auto keys = random_values(num_child_rows); + auto vals = random_values(num_child_rows); + int32_col keys_col(keys.begin(), keys.end()); + float32_col vals_col(vals.begin(), vals.end()); + auto s_col = struct_col({keys_col, vals_col}).release(); + + std::vector row_offsets(num_rows + 1); + for (int idx = 0; idx < num_rows + 1; ++idx) { + row_offsets[idx] = idx * lists_per_row; + } + int32_col offsets(row_offsets.begin(), row_offsets.end()); + + auto list_col = + cudf::make_lists_column(num_rows, offsets.release(), std::move(s_col), 0, rmm::device_buffer{}); + + table_view expected({*list_col, *list_col}); + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("maps"); + expected_metadata.column_metadata[0].set_list_column_as_map(); + expected_metadata.column_metadata[1].set_name("lists"); + expected_metadata.column_metadata[1].child(1).child(0).set_name("int_field"); + expected_metadata.column_metadata[1].child(1).child(1).set_name("float_field"); + + auto filepath = temp_env->get_temp_filepath("MetadataTest.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .metadata(std::move(expected_metadata)); + cudf::io::write_orc(out_opts); + + auto meta = read_orc_metadata(cudf::io::source_info{filepath}); + EXPECT_EQ(meta.num_rows(), num_rows); + + EXPECT_EQ(meta.schema().root().name(), ""); + EXPECT_EQ(meta.schema().root().type_kind(), cudf::io::orc::STRUCT); + ASSERT_EQ(meta.schema().root().num_children(), 2); + + auto const& out_map_col = meta.schema().root().child(0); + EXPECT_EQ(out_map_col.name(), "maps"); + EXPECT_EQ(out_map_col.type_kind(), cudf::io::orc::MAP); + ASSERT_EQ(out_map_col.num_children(), 2); + EXPECT_EQ(out_map_col.child(0).name(), ""); // keys (no name in ORC) + EXPECT_EQ(out_map_col.child(1).name(), ""); // values (no name in ORC) + + auto const& out_list_col = meta.schema().root().child(1); + EXPECT_EQ(out_list_col.name(), "lists"); + EXPECT_EQ(out_list_col.type_kind(), cudf::io::orc::LIST); + ASSERT_EQ(out_list_col.num_children(), 1); + + auto const& out_list_struct_col = out_list_col.child(0); + EXPECT_EQ(out_list_struct_col.name(), ""); // elements (no name in ORC) + EXPECT_EQ(out_list_struct_col.type_kind(), cudf::io::orc::STRUCT); + ASSERT_EQ(out_list_struct_col.num_children(), 2); + + auto const& out_int_col = out_list_struct_col.child(0); + EXPECT_EQ(out_int_col.name(), "int_field"); + EXPECT_EQ(out_int_col.type_kind(), cudf::io::orc::INT); + + auto const& out_float_col = out_list_struct_col.child(1); + EXPECT_EQ(out_float_col.name(), "float_field"); + EXPECT_EQ(out_float_col.type_kind(), cudf::io::orc::FLOAT); +} + +TEST_F(OrcReaderTest, ZstdMaxCompressionRate) +{ + if (cudf::io::nvcomp::is_decompression_disabled(cudf::io::nvcomp::compression_type::ZSTD) or + cudf::io::nvcomp::is_compression_disabled(cudf::io::nvcomp::compression_type::ZSTD)) { + GTEST_SKIP() << "Newer nvCOMP version is required"; + } + + // Encodes as 64KB of zeros, which compresses to 18 bytes with ZSTD + std::vector const h_data(8 * 1024); + float32_col col(h_data.begin(), h_data.end()); + table_view expected({col}); + + auto filepath = temp_env->get_temp_filepath("OrcHugeCompRatio.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .compression(cudf::io::compression_type::ZSTD); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); +} + +TEST_F(OrcWriterTest, CompStats) +{ + auto table = create_random_fixed_table(1, 100000, true); + + auto const stats = std::make_shared(); + + std::vector unused_buffer; + cudf::io::orc_writer_options opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{&unused_buffer}, table->view()) + .compression_statistics(stats); + cudf::io::write_orc(opts); + + EXPECT_NE(stats->num_compressed_bytes(), 0); + EXPECT_EQ(stats->num_failed_bytes(), 0); + EXPECT_EQ(stats->num_skipped_bytes(), 0); + EXPECT_FALSE(std::isnan(stats->compression_ratio())); +} + +TEST_F(OrcChunkedWriterTest, CompStats) +{ + auto table = create_random_fixed_table(1, 100000, true); + + auto const stats = std::make_shared(); + + std::vector unused_buffer; + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{&unused_buffer}) + .compression_statistics(stats); + cudf::io::orc_chunked_writer(opts).write(*table); + + EXPECT_NE(stats->num_compressed_bytes(), 0); + EXPECT_EQ(stats->num_failed_bytes(), 0); + EXPECT_EQ(stats->num_skipped_bytes(), 0); + EXPECT_FALSE(std::isnan(stats->compression_ratio())); + + auto const single_table_comp_stats = *stats; + cudf::io::orc_chunked_writer(opts).write(*table); + + EXPECT_EQ(stats->compression_ratio(), single_table_comp_stats.compression_ratio()); + EXPECT_EQ(stats->num_compressed_bytes(), 2 * single_table_comp_stats.num_compressed_bytes()); + + EXPECT_EQ(stats->num_failed_bytes(), 0); + EXPECT_EQ(stats->num_skipped_bytes(), 0); +} + +void expect_compression_stats_empty(std::shared_ptr stats) +{ + EXPECT_EQ(stats->num_compressed_bytes(), 0); + EXPECT_EQ(stats->num_failed_bytes(), 0); + EXPECT_EQ(stats->num_skipped_bytes(), 0); + EXPECT_TRUE(std::isnan(stats->compression_ratio())); +} + +TEST_F(OrcWriterTest, CompStatsEmptyTable) +{ + auto table_no_rows = create_random_fixed_table(20, 0, false); + + auto const stats = std::make_shared(); + + std::vector unused_buffer; + cudf::io::orc_writer_options opts = cudf::io::orc_writer_options::builder( + cudf::io::sink_info{&unused_buffer}, table_no_rows->view()) + .compression_statistics(stats); + cudf::io::write_orc(opts); + + expect_compression_stats_empty(stats); +} + +TEST_F(OrcChunkedWriterTest, CompStatsEmptyTable) +{ + auto table_no_rows = create_random_fixed_table(20, 0, false); + + auto const stats = std::make_shared(); + + std::vector unused_buffer; + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{&unused_buffer}) + .compression_statistics(stats); + cudf::io::orc_chunked_writer(opts).write(*table_no_rows); + + expect_compression_stats_empty(stats); +} + +TEST_F(OrcWriterTest, EmptyRowGroup) +{ + std::vector ints(10000 + 5, -1); + auto mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i >= 10000; }); + int32_col col{ints.begin(), ints.end(), mask}; + table_view expected({col}); + + auto filepath = temp_env->get_temp_filepath("OrcEmptyRowGroup.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(in_opts); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); +} + +TEST_F(OrcWriterTest, NoNullsAsNonNullable) +{ + auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); + column_wrapper col{{1, 2, 3}, valids}; + table_view expected({col}); + + cudf::io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_nullability(false); + + auto filepath = temp_env->get_temp_filepath("NonNullable.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .metadata(std::move(expected_metadata)); + // Writer should be able to write a column without nulls as non-nullable + EXPECT_NO_THROW(cudf::io::write_orc(out_opts)); +} + +TEST_F(OrcWriterTest, SlicedStringColumn) +{ + std::vector strings{"a", "bc", "def", "longer", "strings", "at the end"}; + str_col col(strings.begin(), strings.end()); + table_view expected({col}); + + // Slice the table to include the longer strings + auto expected_slice = cudf::slice(expected, {2, 6}); + + auto filepath = temp_env->get_temp_filepath("SlicedTable.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_slice, result.tbl->view()); +} + +TEST_F(OrcWriterTest, EmptyChildStringColumn) +{ + list_col col{{}, {}}; + table_view expected({col}); + + auto filepath = temp_env->get_temp_filepath("OrcEmptyChildStringColumn.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); +} + +template +void check_all_null_stats(cudf::io::column_statistics const& stats) +{ + EXPECT_EQ(stats.number_of_values, 0); + EXPECT_TRUE(stats.has_null); + + auto const ts = std::get(stats.type_specific_stats); + EXPECT_FALSE(ts.minimum.has_value()); + EXPECT_FALSE(ts.maximum.has_value()); + EXPECT_TRUE(ts.sum.has_value()); + EXPECT_EQ(*ts.sum, 0); +} + +TEST_F(OrcStatisticsTest, AllNulls) +{ + float64_col double_col({0., 0., 0.}, cudf::test::iterators::all_nulls()); + int32_col int_col({0, 0, 0}, cudf::test::iterators::all_nulls()); + str_col string_col({"", "", ""}, cudf::test::iterators::all_nulls()); + + cudf::table_view expected({int_col, double_col, string_col}); + + std::vector out_buffer; + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{&out_buffer}, expected); + cudf::io::write_orc(out_opts); + + auto const stats = cudf::io::read_parsed_orc_statistics( + cudf::io::source_info{out_buffer.data(), out_buffer.size()}); + + check_all_null_stats(stats.file_stats[1]); + check_all_null_stats(stats.file_stats[2]); + check_all_null_stats(stats.file_stats[3]); +} + +TEST_F(OrcWriterTest, UnorderedDictionary) +{ + std::vector strings{ + "BBBB", "BBBB", "CCCC", "BBBB", "CCCC", "EEEE", "CCCC", "AAAA", "DDDD", "EEEE"}; + str_col col(strings.begin(), strings.end()); + + table_view expected({col}); + + std::vector out_buffer_sorted; + cudf::io::orc_writer_options out_opts_sorted = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{&out_buffer_sorted}, expected); + cudf::io::write_orc(out_opts_sorted); + + cudf::io::orc_reader_options in_opts_sorted = cudf::io::orc_reader_options::builder( + cudf::io::source_info{out_buffer_sorted.data(), out_buffer_sorted.size()}); + auto const from_sorted = cudf::io::read_orc(in_opts_sorted).tbl; + + std::vector out_buffer_unsorted; + cudf::io::orc_writer_options out_opts_unsorted = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{&out_buffer_unsorted}, expected) + .enable_dictionary_sort(false); + cudf::io::write_orc(out_opts_unsorted); + + cudf::io::orc_reader_options in_opts_unsorted = cudf::io::orc_reader_options::builder( + cudf::io::source_info{out_buffer_unsorted.data(), out_buffer_unsorted.size()}); + auto const from_unsorted = cudf::io::read_orc(in_opts_unsorted).tbl; + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*from_sorted, *from_unsorted); +} + +TEST_F(OrcStatisticsTest, Empty) +{ + int32_col col0{}; + float64_col col1{}; + str_col col2{}; + dec64_col col3{}; + column_wrapper col4; + bool_col col5{}; + table_view expected({col0, col1, col2, col3, col4, col5}); + + std::vector out_buffer; + + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{&out_buffer}, expected); + cudf::io::write_orc(out_opts); + + auto const stats = cudf::io::read_parsed_orc_statistics( + cudf::io::source_info{out_buffer.data(), out_buffer.size()}); + + auto expected_column_names = std::vector{""}; + std::generate_n( + std::back_inserter(expected_column_names), + expected.num_columns(), + [starting_index = 0]() mutable { return "_col" + std::to_string(starting_index++); }); + EXPECT_EQ(stats.column_names, expected_column_names); + + EXPECT_EQ(stats.column_names.size(), 7); + EXPECT_EQ(stats.stripes_stats.size(), 0); + + auto const& fstats = stats.file_stats; + ASSERT_EQ(fstats.size(), 7); + auto& s0 = fstats[0]; + EXPECT_TRUE(s0.number_of_values.has_value()); + EXPECT_EQ(*s0.number_of_values, 0ul); + EXPECT_TRUE(s0.has_null.has_value()); + EXPECT_FALSE(*s0.has_null); + + auto& s1 = fstats[1]; + EXPECT_EQ(*s1.number_of_values, 0ul); + EXPECT_FALSE(*s1.has_null); + auto& ts1 = std::get(s1.type_specific_stats); + EXPECT_FALSE(ts1.minimum.has_value()); + EXPECT_FALSE(ts1.maximum.has_value()); + EXPECT_TRUE(ts1.sum.has_value()); + EXPECT_EQ(*ts1.sum, 0); + + auto& s2 = fstats[2]; + EXPECT_EQ(*s2.number_of_values, 0ul); + EXPECT_FALSE(*s2.has_null); + auto& ts2 = std::get(s2.type_specific_stats); + EXPECT_FALSE(ts2.minimum.has_value()); + EXPECT_FALSE(ts2.maximum.has_value()); + EXPECT_TRUE(ts2.sum.has_value()); + EXPECT_EQ(*ts2.sum, 0); + + auto& s3 = fstats[3]; + EXPECT_EQ(*s3.number_of_values, 0ul); + EXPECT_FALSE(*s3.has_null); + auto& ts3 = std::get(s3.type_specific_stats); + EXPECT_FALSE(ts3.minimum.has_value()); + EXPECT_FALSE(ts3.maximum.has_value()); + EXPECT_TRUE(ts3.sum.has_value()); + EXPECT_EQ(*ts3.sum, 0); + + auto& s4 = fstats[4]; + EXPECT_EQ(*s4.number_of_values, 0ul); + EXPECT_FALSE(*s4.has_null); + auto& ts4 = std::get(s4.type_specific_stats); + EXPECT_FALSE(ts4.minimum.has_value()); + EXPECT_FALSE(ts4.maximum.has_value()); + EXPECT_TRUE(ts4.sum.has_value()); + EXPECT_EQ(*ts4.sum, "0"); + + auto& s5 = fstats[5]; + EXPECT_EQ(*s5.number_of_values, 0ul); + EXPECT_FALSE(*s5.has_null); + auto& ts5 = std::get(s5.type_specific_stats); + EXPECT_FALSE(ts5.minimum.has_value()); + EXPECT_FALSE(ts5.maximum.has_value()); + EXPECT_FALSE(ts5.minimum_utc.has_value()); + EXPECT_FALSE(ts5.maximum_utc.has_value()); + EXPECT_FALSE(ts5.minimum_nanos.has_value()); + EXPECT_FALSE(ts5.maximum_nanos.has_value()); + + auto& s6 = fstats[6]; + EXPECT_EQ(*s6.number_of_values, 0ul); + EXPECT_FALSE(*s6.has_null); + auto& ts6 = std::get(s6.type_specific_stats); + EXPECT_EQ(ts6.count[0], 0); +} + +TEST_P(OrcCompressionTest, Basic) +{ + constexpr auto num_rows = 12000; + auto const compression_type = GetParam(); + + // Generate compressible data + auto int_sequence = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 100; }); + auto float_sequence = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i / 32; }); + + int32_col int_col(int_sequence, int_sequence + num_rows); + float32_col float_col(float_sequence, float_sequence + num_rows); + + table_view expected({int_col, float_col}); + + std::vector out_buffer; + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{&out_buffer}, expected) + .compression(compression_type); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = cudf::io::orc_reader_options::builder( + cudf::io::source_info{out_buffer.data(), out_buffer.size()}); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); } -// INSTANTIATE_TEST_CASE_P(OrcWriterTest, -// OrcWriterTestStripes, -// ::testing::Values(std::make_tuple(800000ul, 1000000))); +INSTANTIATE_TEST_CASE_P(OrcCompressionTest, + OrcCompressionTest, + ::testing::Values(cudf::io::compression_type::NONE, + cudf::io::compression_type::SNAPPY, + cudf::io::compression_type::LZ4, + cudf::io::compression_type::ZSTD)); + +TEST_F(OrcWriterTest, BounceBufferBug) +{ + auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 100; }); + + constexpr auto num_rows = 150000; + column_wrapper col(sequence, + sequence + num_rows); + table_view expected({col}); + + auto filepath = temp_env->get_temp_filepath("BounceBufferBug.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .compression(cudf::io::compression_type::ZSTD); + cudf::io::write_orc(out_opts); +} -// INSTANTIATE_TEST_CASE_P(OrcWriterTest, -// OrcWriterTestStripes, -// ::testing::Values(std::make_tuple(800000ul, 1000000), -// std::make_tuple(2000000ul, 1000000), -// std::make_tuple(4000000ul, 1000000), -// std::make_tuple(8000000ul, 1000000), -// std::make_tuple(8000000ul, 500000), -// std::make_tuple(8000000ul, 250000), -// std::make_tuple(8000000ul, 100000))); +CUDF_TEST_PROGRAM_MAIN() From a38b11510c3477a0fc0a891c48ef3b416234a594 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 24 Feb 2024 08:31:59 -0800 Subject: [PATCH 096/321] Fix stripe lookup bug Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index f88b931bd2b..86e863a70e9 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -722,7 +722,9 @@ void reader::impl::decompress_and_decode() auto const rows_to_skip = 0; auto rows_to_read = 0; for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { - rows_to_read += _metadata.per_file_metadata[0].ff.stripes[stripe_idx].numberOfRows; + auto const& stripe = selected_stripes[stripe_idx]; + auto const stripe_info = stripe.stripe_info; + rows_to_read += stripe_info->numberOfRows; } // Set up table for converting timestamp columns from local to UTC time From 75cec9b70ebe11cef3395e57fcf4f547c5cd685d Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 24 Feb 2024 13:18:03 -0800 Subject: [PATCH 097/321] Fix a bug Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 16 +++++++++++----- cpp/src/io/orc/reader_impl_chunking.cu | 5 +++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 86e863a70e9..90e2f1b63c2 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -715,17 +715,23 @@ void reader::impl::decompress_and_decode() printf("\ndecoding data from stripe %d -> %d\n", (int)stripe_start, (int)stripe_end); - // auto const rows_to_skip = _file_itm_data.rows_to_skip; + auto const rows_to_skip = _file_itm_data.rows_to_skip; // auto const rows_to_read = _file_itm_data.rows_to_read; auto const& selected_stripes = _file_itm_data.selected_stripes; - auto const rows_to_skip = 0; - auto rows_to_read = 0; + // auto const rows_to_skip = 0; + auto rows_to_read = 0; for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { auto const& stripe = selected_stripes[stripe_idx]; auto const stripe_info = stripe.stripe_info; rows_to_read += stripe_info->numberOfRows; + + if (_file_itm_data.rows_to_skip > 0) { + CUDF_EXPECTS(_file_itm_data.rows_to_skip < stripe_info->numberOfRows, "TODO"); + } } + rows_to_read -= rows_to_skip; + _file_itm_data.rows_to_skip = 0; // Set up table for converting timestamp columns from local to UTC time auto const tz_table = [&, &selected_stripes = selected_stripes] { @@ -1156,8 +1162,8 @@ table_with_metadata reader::impl::make_output_chunk() col_buffer, &out_metadata.schema_info.back(), std::nullopt, _stream); }); - printf("output col: \n"); - cudf::test::print(out_columns.front()->view()); + // printf("output col: \n"); + // cudf::test::print(out_columns.front()->view()); auto tbl = std::make_unique
(std::move(out_columns)); tabs.push_back(std::move(tbl)); diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 6b72ea28a96..d8e722f75e0 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -332,6 +332,11 @@ void reader::impl::global_preprocess(uint64_t skip_rows, _metadata.select_stripes(stripes, skip_rows, num_rows_opt, _stream); if (_file_itm_data.has_no_data()) { return; } + printf("input skip rows: %d, num rows: %d\n", (int)skip_rows, (int)num_rows_opt.value_or(-1)); + printf("actual skip rows: %d, num rows: %d\n", + (int)_file_itm_data.rows_to_skip, + (int)_file_itm_data.rows_to_read); + // auto const rows_to_skip = _file_itm_data.rows_to_skip; // auto const rows_to_read = _file_itm_data.rows_to_read; auto const& selected_stripes = _file_itm_data.selected_stripes; From a7bd47a06b369b1fd16b2f8bceca6782cd16248a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 24 Feb 2024 13:35:00 -0800 Subject: [PATCH 098/321] Fix another bug Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 90e2f1b63c2..fe653d74aa8 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -730,7 +730,7 @@ void reader::impl::decompress_and_decode() CUDF_EXPECTS(_file_itm_data.rows_to_skip < stripe_info->numberOfRows, "TODO"); } } - rows_to_read -= rows_to_skip; + rows_to_read = std::min(rows_to_read - rows_to_skip, _file_itm_data.rows_to_read); _file_itm_data.rows_to_skip = 0; // Set up table for converting timestamp columns from local to UTC time From db768fbee5b1190281c79b685d5c69ad851a5225 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 24 Feb 2024 19:36:57 -0800 Subject: [PATCH 099/321] Debugging Signed-off-by: Nghia Truong --- cpp/include/cudf/io/orc.hpp | 2 +- cpp/src/io/orc/reader_impl.cu | 41 +++++++++--- cpp/src/io/orc/reader_impl_chunking.cu | 7 ++ cpp/tests/io/orc_test.cpp | 91 ++++++++++++++++++-------- 4 files changed, 106 insertions(+), 35 deletions(-) diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 61f4681a3f4..d24ee6f9225 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -703,7 +703,7 @@ class orc_writer_options { */ void set_stripe_size_rows(size_type size_rows) { - CUDF_EXPECTS(size_rows >= 512, "Maximum stripe size cannot be smaller than 512"); + // CUDF_EXPECTS(size_rows >= 512, "Maximum stripe size cannot be smaller than 512"); _stripe_size_rows = size_rows; } diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index fe653d74aa8..2147bd066a7 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -537,6 +537,8 @@ void decode_stream_data(std::size_t num_dicts, [&](auto null_count, auto const stripe_idx) { printf( "null count: %d => %d\n", (int)stripe_idx, (int)chunks[stripe_idx][col_idx].null_count); + printf("num child rows: %d \n", (int)chunks[stripe_idx][col_idx].num_child_rows); + return null_count + chunks[stripe_idx][col_idx].null_count; }); }); @@ -768,6 +770,8 @@ void reader::impl::decompress_and_decode() // compared to parent column. auto& col_meta = *_col_meta; for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + printf("processing level = %d\n", (int)level); + auto& columns_level = _selected_columns.levels[level]; // TODO: do it in global step @@ -893,12 +897,17 @@ void reader::impl::decompress_and_decode() auto& chunk = chunks[stripe_idx - stripe_start][col_idx]; // start row, number of rows in a each stripe and total number of rows // may change in lower levels of nesting - chunk.start_row = (level == 0) - ? stripe_start_row - : col_meta.child_start_row[stripe_idx * num_columns + col_idx]; - chunk.num_rows = (level == 0) - ? stripe_info->numberOfRows - : col_meta.num_child_rows_per_stripe[stripe_idx * num_columns + col_idx]; + chunk.start_row = (level == 0) + ? stripe_start_row + : col_meta.child_start_row[stripe_idx * num_columns + col_idx]; + chunk.num_rows = (level == 0) + ? stripe_info->numberOfRows + : col_meta.num_child_rows_per_stripe[stripe_idx * num_columns + col_idx]; + printf("col idx: %d, start_row: %d, num rows: %d\n", + (int)col_idx, + (int)chunk.start_row, + (int)chunk.num_rows); + chunk.column_num_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[col_idx]; chunk.parent_validity_info = (level == 0) ? column_validity_info{} : col_meta.parent_column_data[col_idx]; @@ -909,6 +918,9 @@ void reader::impl::decompress_and_decode() chunk.encoding_kind = stripe_footer->columns[columns_level[col_idx].id].kind; chunk.type_kind = _metadata.per_file_metadata[stripe.source_idx].ff.types[columns_level[col_idx].id].kind; + + printf("type: %d\n", (int)chunk.type_kind); + // num_child_rows for a struct column will be same, for other nested types it will be // calculated. chunk.num_child_rows = (chunk.type_kind != orc::STRUCT) ? 0 : chunk.num_rows; @@ -931,6 +943,16 @@ void reader::impl::decompress_and_decode() if (not is_stripe_data_empty) { for (int k = 0; k < gpu::CI_NUM_STREAMS; k++) { chunk.streams[k] = dst_base + stream_info[chunk.strm_id[k] + stripe_start].dst_pos; + if (chunk.strm_len[k]) { + auto& info = stream_info[chunk.strm_id[k] + stripe_start]; + printf("stream id: stripe: %d, level: %d, col idx: %d, kind: %d\n", + (int)info.id.stripe_idx, + (int)info.id.level, + (int)info.id.orc_col_idx, + (int)info.id.kind); + + printf("stream %d: %p\n", (int)k, chunk.streams[k]); + } } } } @@ -1017,6 +1039,7 @@ void reader::impl::decompress_and_decode() bool is_nullable = false; for (std::size_t j = 0; j < num_stripes; ++j) { if (chunks[j][i].strm_len[gpu::CI_PRESENT] != 0) { + printf(" is nullable\n"); is_nullable = true; break; } @@ -1162,8 +1185,10 @@ table_with_metadata reader::impl::make_output_chunk() col_buffer, &out_metadata.schema_info.back(), std::nullopt, _stream); }); - // printf("output col: \n"); - // cudf::test::print(out_columns.front()->view()); + printf("output col0: \n"); + cudf::test::print(out_columns.front()->view()); + printf("output col1: \n"); + cudf::test::print(out_columns.back()->view()); auto tbl = std::make_unique
(std::move(out_columns)); tabs.push_back(std::move(tbl)); diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index d8e722f75e0..92e4b859388 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -138,6 +138,13 @@ std::size_t gather_stream_info_and_column_desc( } (*stream_idx)++; } else { // not chunks.has_value() + printf("collect stream id: stripe: %d, level: %d, col idx: %d, kind: %d\n", + (int)stripe_index, + (int)level, + (int)column_id, + (int)stream.kind); + ; + stream_info.value()->emplace_back( stripeinfo->offset + src_offset, dst_offset, diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index bb132e477dd..d10e2c54fae 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -150,18 +150,9 @@ inline auto random_values(size_t size) { std::vector values(size); - using T1 = T; - using uniform_distribution = - typename std::conditional_t, - std::bernoulli_distribution, - std::conditional_t, - std::uniform_real_distribution, - std::uniform_int_distribution>>; - - static constexpr auto seed = 0xf00d; - static std::mt19937 engine{seed}; - static uniform_distribution dist{}; - std::generate_n(values.begin(), size, [&]() { return T{dist(engine)}; }); + for (size_t i = 0; i < size; ++i) { + values[i] = i; + } return values; } @@ -1448,49 +1439,97 @@ TEST_F(OrcWriterTest, StripeSizeInvalid) TEST_F(OrcWriterTest, TestMap) { - auto const num_rows = 1200000; - auto const lists_per_row = 4; + auto const num_rows = 15; + auto const lists_per_row = 2; auto const num_child_rows = (num_rows * lists_per_row) / 2; // half due to validity - auto keys = random_values(num_child_rows); - auto vals = random_values(num_child_rows); - auto vals_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3; }); + auto keys = random_values(num_child_rows); + // auto vals = random_values(num_child_rows); + // auto vals_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3; + // }); int32_col keys_col(keys.begin(), keys.end()); - float32_col vals_col{vals.begin(), vals.end(), vals_mask}; - auto s_col = struct_col({keys_col, vals_col}).release(); + int32_col keys_col2(keys.begin(), keys.end()); + // float32_col vals_col(vals.begin(), vals.end()); + auto s_col = struct_col({{keys_col}}).release(); - auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); + // auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); std::vector row_offsets(num_rows + 1); int offset = 0; for (int idx = 0; idx < (num_rows) + 1; ++idx) { row_offsets[idx] = offset; - if (valids[idx]) { offset += lists_per_row; } + // if (valids[idx]) { + offset += lists_per_row; + // } } int32_col offsets(row_offsets.begin(), row_offsets.end()); - auto num_list_rows = static_cast(offsets).size() - 1; - auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + num_list_rows); - auto list_col = cudf::make_lists_column( - num_list_rows, offsets.release(), std::move(s_col), null_count, std::move(null_mask)); + printf("line %d\n", __LINE__); + fflush(stdout); + +#if 0 + auto num_list_rows = static_cast(offsets).size() - 1; + // auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + + // num_list_rows); + auto list_col = + cudf::make_lists_column(num_list_rows, offsets.release(), std::move(s_col), 0, {}); + + printf("line %d\n", __LINE__); + fflush(stdout); + ; table_view expected({*list_col}); + printf("input:\n"); + cudf::test::print(*list_col); +#endif + table_view expected({*s_col, keys_col2}); + + printf("input0:\n"); + cudf::test::print(*s_col); + printf("input1:\n"); + cudf::test::print(keys_col2); + + printf("line %d\n", __LINE__); + fflush(stdout); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_list_column_as_map(); + printf("line %d\n", __LINE__); + fflush(stdout); + ; + auto filepath = temp_env->get_temp_filepath("MapColumn.orc"); cudf::io::orc_writer_options out_opts = cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) - .metadata(expected_metadata); + .metadata(expected_metadata) + .stripe_size_rows(10); cudf::io::write_orc(out_opts); + printf("line %d\n", __LINE__); + fflush(stdout); + cudf::io::orc_reader_options in_opts = cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); auto result = cudf::io::read_orc(in_opts); + printf("line %d\n", __LINE__); + fflush(stdout); + + printf("output:\n"); + cudf::test::print(result.tbl->get_column(0)); + ; + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + + printf("line %d\n", __LINE__); + fflush(stdout); + cudf::test::expect_metadata_equal(expected_metadata, result.metadata); + + printf("line %d\n", __LINE__); + fflush(stdout); } TEST_F(OrcReaderTest, NestedColumnSelection) From f8652d7915186a62629ad6c3129f5c19c38cd2fe Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 25 Feb 2024 13:22:10 -0800 Subject: [PATCH 100/321] All tests pass Signed-off-by: Nghia Truong --- cpp/include/cudf/io/orc.hpp | 2 +- cpp/src/io/orc/reader_impl.cu | 99 +++++++++++++++++++++++++++++++---- cpp/tests/io/orc_test.cpp | 76 +++++---------------------- 3 files changed, 105 insertions(+), 72 deletions(-) diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index d24ee6f9225..61f4681a3f4 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -703,7 +703,7 @@ class orc_writer_options { */ void set_stripe_size_rows(size_type size_rows) { - // CUDF_EXPECTS(size_rows >= 512, "Maximum stripe size cannot be smaller than 512"); + CUDF_EXPECTS(size_rows >= 512, "Maximum stripe size cannot be smaller than 512"); _stripe_size_rows = size_rows; } diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 2147bd066a7..d657d95a4ef 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -591,7 +591,8 @@ void scan_null_counts(cudf::detail::hostdevice_2dvector const& /** * @brief Aggregate child metadata from parent column chunks. */ -void aggregate_child_meta(std::size_t level, +void aggregate_child_meta(std::size_t stripe_start, + std::size_t level, cudf::io::orc::detail::column_hierarchy const& selected_columns, cudf::detail::host_2dspan chunks, cudf::detail::host_2dspan row_groups, @@ -624,15 +625,22 @@ void aggregate_child_meta(std::size_t level, int index = 0; // number of child column processed + printf("\n\n"); // For each parent column, update its child column meta for each stripe. std::for_each(nested_cols.begin(), nested_cols.end(), [&](auto const p_col) { + printf("p_col.id: %d\n", (int)p_col.id); + auto const parent_col_idx = col_meta.orc_col_map[level][p_col.id]; + printf(" level: %d, parent_col_idx: %d\n", (int)level, (int)parent_col_idx); + auto start_row = 0; auto processed_row_groups = 0; for (std::size_t stripe_id = 0; stripe_id < num_of_stripes; stripe_id++) { // Aggregate num_rows and start_row from processed parent columns per row groups if (num_of_rowgroups) { + printf(" num_of_rowgroups: %d\n", (int)num_of_rowgroups); + auto stripe_num_row_groups = chunks[stripe_id][parent_col_idx].num_rowgroups; auto processed_child_rows = 0; @@ -650,16 +658,24 @@ void aggregate_child_meta(std::size_t level, // Aggregate start row, number of rows per chunk and total number of rows in a column auto const child_rows = chunks[stripe_id][parent_col_idx].num_child_rows; + printf(" stripe_id: %d: child_rows: %d\n", (int)stripe_id, (int)child_rows); + printf(" p_col.num_children: %d\n", (int)p_col.num_children); + for (size_type id = 0; id < p_col.num_children; id++) { auto const child_col_idx = index + id; // TODO: Check for overflow here. num_child_rows[child_col_idx] += child_rows; - num_child_rows_per_stripe[stripe_id][child_col_idx] = child_rows; + num_child_rows_per_stripe[stripe_id + stripe_start][child_col_idx] = child_rows; // start row could be different for each column when there is nesting at each stripe level - child_start_row[stripe_id][child_col_idx] = (stripe_id == 0) ? 0 : start_row; + child_start_row[stripe_id + stripe_start][child_col_idx] = (stripe_id == 0) ? 0 : start_row; + printf("update child_start_row (%d, %d): %d\n", + (int)stripe_id, + (int)child_col_idx, + (int)start_row); } start_row += child_rows; + printf(" start_row: %d\n", (int)start_row); } // Parent column null mask and null count would be required for child column @@ -769,6 +785,62 @@ void reader::impl::decompress_and_decode() // Iterates through levels of nested columns, child column will be one level down // compared to parent column. auto& col_meta = *_col_meta; + + printf("num_child_rows: (size %d)\n", (int)_col_meta->num_child_rows.size()); + if (_col_meta->num_child_rows.size()) { + for (auto x : _col_meta->num_child_rows) { + printf("%d, ", (int)x); + } + printf("\n"); + + _col_meta->num_child_rows.clear(); + } + + printf("parent_column_data null count: (size %d)\n", (int)_col_meta->parent_column_data.size()); + if (_col_meta->parent_column_data.size()) { + for (auto x : _col_meta->parent_column_data) { + printf("%d, ", (int)x.null_count); + } + printf("\n"); + _col_meta->parent_column_data.clear(); + } + + printf("parent_column_index: (size %d)\n", (int)_col_meta->parent_column_index.size()); + if (_col_meta->parent_column_index.size()) { + for (auto x : _col_meta->parent_column_index) { + printf("%d, ", (int)x); + } + printf("\n"); + _col_meta->parent_column_index.clear(); + } + + printf("child_start_row: (size %d)\n", (int)_col_meta->child_start_row.size()); + if (_col_meta->child_start_row.size()) { + for (auto x : _col_meta->child_start_row) { + printf("%d, ", (int)x); + } + printf("\n"); + _col_meta->child_start_row.clear(); + } + + printf("num_child_rows_per_stripe: (size %d)\n", + (int)_col_meta->num_child_rows_per_stripe.size()); + if (_col_meta->num_child_rows_per_stripe.size()) { + for (auto x : _col_meta->num_child_rows_per_stripe) { + printf("%d, ", (int)x); + } + printf("\n"); + _col_meta->num_child_rows_per_stripe.clear(); + } + + printf("rwgrp_meta: (size %d)\n", (int)_col_meta->rwgrp_meta.size()); + if (_col_meta->rwgrp_meta.size()) { + for (auto x : _col_meta->rwgrp_meta) { + printf("(%d | %d), ", (int)x.start_row, (int)x.num_rows); + } + printf("\n"); + } + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { printf("processing level = %d\n", (int)level); @@ -1046,6 +1118,9 @@ void reader::impl::decompress_and_decode() } auto is_list_type = (column_types[i].id() == type_id::LIST); auto n_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[i]; + + printf(" create child col, num rows: %d\n", (int)n_rows); + // For list column, offset column will be always size + 1 if (is_list_type) n_rows++; _out_buffers[level].emplace_back(column_types[i], n_rows, is_nullable, _stream, _mr); @@ -1075,8 +1150,14 @@ void reader::impl::decompress_and_decode() scan_null_counts(chunks, null_count_prefix_sums[level], _stream); row_groups.device_to_host_sync(_stream); - aggregate_child_meta( - level, _selected_columns, chunks, row_groups, nested_cols, _out_buffers[level], col_meta); + aggregate_child_meta(stripe_start, + level, + _selected_columns, + chunks, + row_groups, + nested_cols, + _out_buffers[level], + col_meta); // ORC stores number of elements at each row, so we need to generate offsets from that std::vector buff_data; @@ -1185,10 +1266,10 @@ table_with_metadata reader::impl::make_output_chunk() col_buffer, &out_metadata.schema_info.back(), std::nullopt, _stream); }); - printf("output col0: \n"); - cudf::test::print(out_columns.front()->view()); - printf("output col1: \n"); - cudf::test::print(out_columns.back()->view()); + // printf("output col0: \n"); + // cudf::test::print(out_columns.front()->view()); + // printf("output col1: \n"); + // cudf::test::print(out_columns.back()->view()); auto tbl = std::make_unique
(std::move(out_columns)); tabs.push_back(std::move(tbl)); diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index d10e2c54fae..80dc3ab6fdb 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -1439,97 +1439,49 @@ TEST_F(OrcWriterTest, StripeSizeInvalid) TEST_F(OrcWriterTest, TestMap) { - auto const num_rows = 15; - auto const lists_per_row = 2; + auto const num_rows = 1200000; + auto const lists_per_row = 4; auto const num_child_rows = (num_rows * lists_per_row) / 2; // half due to validity - auto keys = random_values(num_child_rows); - // auto vals = random_values(num_child_rows); - // auto vals_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3; - // }); + auto keys = random_values(num_child_rows); + auto vals = random_values(num_child_rows); + auto vals_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3; }); int32_col keys_col(keys.begin(), keys.end()); - int32_col keys_col2(keys.begin(), keys.end()); - // float32_col vals_col(vals.begin(), vals.end()); - auto s_col = struct_col({{keys_col}}).release(); + float32_col vals_col{vals.begin(), vals.end(), vals_mask}; + auto s_col = struct_col({keys_col, vals_col}).release(); - // auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); + auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); std::vector row_offsets(num_rows + 1); int offset = 0; for (int idx = 0; idx < (num_rows) + 1; ++idx) { row_offsets[idx] = offset; - // if (valids[idx]) { - offset += lists_per_row; - // } + if (valids[idx]) { offset += lists_per_row; } } int32_col offsets(row_offsets.begin(), row_offsets.end()); - printf("line %d\n", __LINE__); - fflush(stdout); - -#if 0 - auto num_list_rows = static_cast(offsets).size() - 1; - // auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + - // num_list_rows); - auto list_col = - cudf::make_lists_column(num_list_rows, offsets.release(), std::move(s_col), 0, {}); - - printf("line %d\n", __LINE__); - fflush(stdout); - ; + auto num_list_rows = static_cast(offsets).size() - 1; + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + num_list_rows); + auto list_col = cudf::make_lists_column( + num_list_rows, offsets.release(), std::move(s_col), null_count, std::move(null_mask)); table_view expected({*list_col}); - printf("input:\n"); - cudf::test::print(*list_col); -#endif - table_view expected({*s_col, keys_col2}); - - printf("input0:\n"); - cudf::test::print(*s_col); - printf("input1:\n"); - cudf::test::print(keys_col2); - - printf("line %d\n", __LINE__); - fflush(stdout); - cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_list_column_as_map(); - printf("line %d\n", __LINE__); - fflush(stdout); - ; - auto filepath = temp_env->get_temp_filepath("MapColumn.orc"); cudf::io::orc_writer_options out_opts = cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) - .metadata(expected_metadata) - .stripe_size_rows(10); + .metadata(expected_metadata); cudf::io::write_orc(out_opts); - printf("line %d\n", __LINE__); - fflush(stdout); - cudf::io::orc_reader_options in_opts = cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); auto result = cudf::io::read_orc(in_opts); - printf("line %d\n", __LINE__); - fflush(stdout); - - printf("output:\n"); - cudf::test::print(result.tbl->get_column(0)); - ; - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); - - printf("line %d\n", __LINE__); - fflush(stdout); - cudf::test::expect_metadata_equal(expected_metadata, result.metadata); - - printf("line %d\n", __LINE__); - fflush(stdout); } TEST_F(OrcReaderTest, NestedColumnSelection) From 537ea0cd841b3fbcf2c215d4f37fa9c24168206c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 25 Feb 2024 14:00:46 -0800 Subject: [PATCH 101/321] Reverse tests Signed-off-by: Nghia Truong --- cpp/tests/io/orc_test.cpp | 90 ++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 44 deletions(-) diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index 80dc3ab6fdb..0b34b39f739 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -150,9 +149,18 @@ inline auto random_values(size_t size) { std::vector values(size); - for (size_t i = 0; i < size; ++i) { - values[i] = i; - } + using T1 = T; + using uniform_distribution = + typename std::conditional_t, + std::bernoulli_distribution, + std::conditional_t, + std::uniform_real_distribution, + std::uniform_int_distribution>>; + + static constexpr auto seed = 0xf00d; + static std::mt19937 engine{seed}; + static uniform_distribution dist{}; + std::generate_n(values.begin(), size, [&]() { return T{dist(engine)}; }); return values; } @@ -198,7 +206,7 @@ struct SkipRowTest { .skip_rows(skip_rows) .num_rows(read_num_rows); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_result->view(), result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_result->view(), result.tbl->view()); } void test(int skip_rows, int file_num_rows) @@ -212,7 +220,7 @@ struct SkipRowTest { .use_index(false) .skip_rows(skip_rows); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_result->view(), result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_result->view(), result.tbl->view()); } }; @@ -236,7 +244,7 @@ TYPED_TEST(OrcWriterNumericTypeTest, SingleColumn) cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } TYPED_TEST(OrcWriterNumericTypeTest, SingleColumnWithNulls) @@ -258,7 +266,7 @@ TYPED_TEST(OrcWriterNumericTypeTest, SingleColumnWithNulls) cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } TYPED_TEST(OrcWriterTimestampTypeTest, Timestamps) @@ -282,7 +290,7 @@ TYPED_TEST(OrcWriterTimestampTypeTest, Timestamps) .timestamp_type(this->type()); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } TYPED_TEST(OrcWriterTimestampTypeTest, TimestampsWithNulls) @@ -308,7 +316,7 @@ TYPED_TEST(OrcWriterTimestampTypeTest, TimestampsWithNulls) .timestamp_type(this->type()); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } TYPED_TEST(OrcWriterTimestampTypeTest, TimestampOverflow) @@ -332,7 +340,7 @@ TYPED_TEST(OrcWriterTimestampTypeTest, TimestampOverflow) .timestamp_type(this->type()); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } TEST_F(OrcWriterTest, MultiColumn) @@ -392,7 +400,7 @@ TEST_F(OrcWriterTest, MultiColumn) cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); } @@ -459,7 +467,7 @@ TEST_F(OrcWriterTest, MultiColumnWithNulls) cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); } @@ -517,7 +525,7 @@ TEST_F(OrcWriterTest, Strings) cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); } @@ -569,7 +577,7 @@ TEST_F(OrcWriterTest, SlicedTable) cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_slice, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); } @@ -596,7 +604,7 @@ TEST_F(OrcWriterTest, HostBuffer) .use_index(false); auto const result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); } @@ -626,7 +634,7 @@ TEST_F(OrcWriterTest, negTimestampsNano) CUDF_TEST_EXPECT_COLUMNS_EQUAL( expected.column(0), result.tbl->view().column(0), cudf::test::debug_output_level::ALL_ERRORS); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } TEST_F(OrcWriterTest, Slice) @@ -662,7 +670,7 @@ TEST_F(OrcChunkedWriterTest, SingleTable) cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); auto result = cudf::io::read_orc(read_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *table1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *table1); } TEST_F(OrcChunkedWriterTest, SimpleTable) @@ -682,7 +690,7 @@ TEST_F(OrcChunkedWriterTest, SimpleTable) cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); auto result = cudf::io::read_orc(read_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table); + CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } TEST_F(OrcChunkedWriterTest, LargeTables) @@ -702,7 +710,7 @@ TEST_F(OrcChunkedWriterTest, LargeTables) cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); auto result = cudf::io::read_orc(read_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table); + CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } TEST_F(OrcChunkedWriterTest, ManyTables) @@ -732,7 +740,7 @@ TEST_F(OrcChunkedWriterTest, ManyTables) cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); auto result = cudf::io::read_orc(read_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *expected); + CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } TEST_F(OrcChunkedWriterTest, Metadata) @@ -791,7 +799,7 @@ TEST_F(OrcChunkedWriterTest, Strings) cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); auto result = cudf::io::read_orc(read_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *expected); + CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } TEST_F(OrcChunkedWriterTest, MismatchedTypes) @@ -839,8 +847,8 @@ TEST_F(OrcChunkedWriterTest, MismatchedStructure) TEST_F(OrcChunkedWriterTest, ReadStripes) { srand(31337); - auto table1 = create_random_fixed_table(1, 5, true); - auto table2 = create_random_fixed_table(1, 6, true); + auto table1 = create_random_fixed_table(5, 5, true); + auto table2 = create_random_fixed_table(5, 5, true); auto full_table = cudf::concatenate(std::vector({*table2, *table1, *table2})); @@ -849,17 +857,11 @@ TEST_F(OrcChunkedWriterTest, ReadStripes) cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); cudf::io::orc_chunked_writer(opts).write(*table1).write(*table2); - printf("tab 1: \n"); - cudf::test::print(table1->get_column(0).view()); - - printf("tab 2: \n"); - cudf::test::print(table2->get_column(0).view()); - cudf::io::orc_reader_options read_opts = cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).stripes({{1, 0, 1}}); auto result = cudf::io::read_orc(read_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table); + CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } TEST_F(OrcChunkedWriterTest, ReadStripesError) @@ -919,7 +921,7 @@ TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize) cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); auto result = cudf::io::read_orc(read_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *expected); + CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize2) @@ -962,7 +964,7 @@ TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize2) cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); auto result = cudf::io::read_orc(read_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *expected); + CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } TEST_F(OrcReaderTest, CombinedSkipRowTest) @@ -1121,7 +1123,7 @@ TEST_F(OrcWriterTest, SlicedValidMask) cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(tbl, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(tbl, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); } @@ -1139,7 +1141,7 @@ TEST_F(OrcReaderTest, SingleInputs) cudf::io::orc_reader_options::builder(cudf::io::source_info{{filepath1}}); auto result = cudf::io::read_orc(read_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *table1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *table1); } TEST_F(OrcReaderTest, zstdCompressionRegression) @@ -1198,7 +1200,7 @@ TEST_F(OrcReaderTest, MultipleInputs) cudf::io::orc_reader_options::builder(cudf::io::source_info{{filepath1, filepath2}}); auto result = cudf::io::read_orc(read_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table); + CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } struct OrcWriterTestDecimal : public OrcWriterTest, @@ -1385,7 +1387,7 @@ TEST_P(OrcWriterTestStripes, StripeSize) .use_index(false); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected->view(), result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view()); }; { @@ -1480,7 +1482,7 @@ TEST_F(OrcWriterTest, TestMap) cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); } @@ -1743,7 +1745,7 @@ TEST_F(OrcReaderTest, ZstdMaxCompressionRate) cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } TEST_F(OrcWriterTest, CompStats) @@ -1844,7 +1846,7 @@ TEST_F(OrcWriterTest, EmptyRowGroup) cudf::io::orc_reader_options in_opts = cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } TEST_F(OrcWriterTest, NoNullsAsNonNullable) @@ -1882,7 +1884,7 @@ TEST_F(OrcWriterTest, SlicedStringColumn) cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_slice, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view()); } TEST_F(OrcWriterTest, EmptyChildStringColumn) @@ -1899,7 +1901,7 @@ TEST_F(OrcWriterTest, EmptyChildStringColumn) cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } template @@ -1963,7 +1965,7 @@ TEST_F(OrcWriterTest, UnorderedDictionary) cudf::io::source_info{out_buffer_unsorted.data(), out_buffer_unsorted.size()}); auto const from_unsorted = cudf::io::read_orc(in_opts_unsorted).tbl; - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*from_sorted, *from_unsorted); + CUDF_TEST_EXPECT_TABLES_EQUAL(*from_sorted, *from_unsorted); } TEST_F(OrcStatisticsTest, Empty) @@ -2083,7 +2085,7 @@ TEST_P(OrcCompressionTest, Basic) cudf::io::source_info{out_buffer.data(), out_buffer.size()}); auto result = cudf::io::read_orc(in_opts); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, result.tbl->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } INSTANTIATE_TEST_CASE_P(OrcCompressionTest, From 24e15523d76ced09709bb1ad0a484e8756d4e390 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 25 Feb 2024 14:06:48 -0800 Subject: [PATCH 102/321] Fix for temp concatenation Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index d657d95a4ef..74939b4e628 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -1289,7 +1289,26 @@ table_with_metadata reader::impl::make_output_chunk() // todo: remove this // auto out_table = std::make_unique
(std::move(out_columns)); auto out_table = [&] { - if (tv.size() > 1) { return cudf::concatenate(tv); } + if (tv.size() > 1) { + auto tmp = cudf::concatenate(tv); + std::vector has_mask(tmp->num_columns(), false); + std::vector has_nulls(tmp->num_columns(), false); + + for (int i = 0; i < tmp->num_columns(); ++i) { + for (int j = 0; j < (int)tv.size(); ++j) { + if (tv[j].column(i).nullable()) { has_mask[i] = true; } + if (tv[j].column(i).null_count()) { has_nulls[i] = true; } + } + } + for (int i = 0; i < tmp->num_columns(); ++i) { + if (has_mask[i] && !has_nulls[i]) { + tmp->get_column(i).set_null_mask( + cudf::create_null_mask(tmp->get_column(i).size(), cudf::mask_state::ALL_VALID), 0); + } + } + + return tmp; + } return std::move(tabs.front()); }(); // auto out_table = std::move(tabs.front()); From df8d9b3a7ccc67907070cea22d31312fee844a95 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 25 Feb 2024 14:32:43 -0800 Subject: [PATCH 103/321] Turn off debug printing Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 37 +++++++++++--------------- cpp/src/io/orc/reader_impl_chunking.cu | 13 +++++---- 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 74939b4e628..304f35bd388 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -530,17 +530,19 @@ void decode_stream_data(std::size_t num_dicts, CUDF_EXPECTS(num_errors == 0, "ORC data decode failed"); std::for_each(col_idx_it + 0, col_idx_it + num_columns, [&](auto col_idx) { - out_buffers[col_idx].null_count() = std::accumulate( - stripe_idx_it + 0, - stripe_idx_it + num_stripes, - 0, - [&](auto null_count, auto const stripe_idx) { - printf( - "null count: %d => %d\n", (int)stripe_idx, (int)chunks[stripe_idx][col_idx].null_count); - printf("num child rows: %d \n", (int)chunks[stripe_idx][col_idx].num_child_rows); - - return null_count + chunks[stripe_idx][col_idx].null_count; - }); + out_buffers[col_idx].null_count() = + std::accumulate(stripe_idx_it + 0, + stripe_idx_it + num_stripes, + 0, + [&](auto null_count, auto const stripe_idx) { + // printf( + // "null count: %d => %d\n", (int)stripe_idx, + // (int)chunks[stripe_idx][col_idx].null_count); + // printf("num child rows: %d \n", + // (int)chunks[stripe_idx][col_idx].num_child_rows); + + return null_count + chunks[stripe_idx][col_idx].null_count; + }); }); } @@ -786,6 +788,7 @@ void reader::impl::decompress_and_decode() // compared to parent column. auto& col_meta = *_col_meta; +#if 0 printf("num_child_rows: (size %d)\n", (int)_col_meta->num_child_rows.size()); if (_col_meta->num_child_rows.size()) { for (auto x : _col_meta->num_child_rows) { @@ -841,6 +844,8 @@ void reader::impl::decompress_and_decode() printf("\n"); } +#endif + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { printf("processing level = %d\n", (int)level); @@ -1015,16 +1020,6 @@ void reader::impl::decompress_and_decode() if (not is_stripe_data_empty) { for (int k = 0; k < gpu::CI_NUM_STREAMS; k++) { chunk.streams[k] = dst_base + stream_info[chunk.strm_id[k] + stripe_start].dst_pos; - if (chunk.strm_len[k]) { - auto& info = stream_info[chunk.strm_id[k] + stripe_start]; - printf("stream id: stripe: %d, level: %d, col idx: %d, kind: %d\n", - (int)info.id.stripe_idx, - (int)info.id.level, - (int)info.id.orc_col_idx, - (int)info.id.kind); - - printf("stream %d: %p\n", (int)k, chunk.streams[k]); - } } } } diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 92e4b859388..e9b4a337006 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -105,7 +105,7 @@ std::size_t gather_stream_info_and_column_desc( auto const schema_type = types[column_id]; if (!schema_type.subtypes.empty() && schema_type.kind == orc::STRUCT && stream.kind == orc::PRESENT) { - printf("present stream\n"); + // printf("present stream\n"); for (auto const& idx : schema_type.subtypes) { auto const child_idx = (idx < orc2gdf.size()) ? orc2gdf[idx] : -1; if (child_idx >= 0) { @@ -138,12 +138,11 @@ std::size_t gather_stream_info_and_column_desc( } (*stream_idx)++; } else { // not chunks.has_value() - printf("collect stream id: stripe: %d, level: %d, col idx: %d, kind: %d\n", - (int)stripe_index, - (int)level, - (int)column_id, - (int)stream.kind); - ; + // printf("collect stream id: stripe: %d, level: %d, col idx: %d, kind: %d\n", + // (int)stripe_index, + // (int)level, + // (int)column_id, + // (int)stream.kind); stream_info.value()->emplace_back( stripeinfo->offset + src_offset, From 12dff3b790440ace7137ce116e191d39bd72761c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 26 Feb 2024 11:45:43 -0800 Subject: [PATCH 104/321] Some fixes --- cpp/src/io/orc/aggregate_orc_metadata.cpp | 8 +++++--- cpp/src/io/orc/orc.hpp | 2 +- cpp/src/io/orc/reader_impl.cu | 13 +++++++++---- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/cpp/src/io/orc/aggregate_orc_metadata.cpp b/cpp/src/io/orc/aggregate_orc_metadata.cpp index 257c356d6b8..1e9cb50d532 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.cpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.cpp @@ -198,8 +198,9 @@ aggregate_orc_metadata::select_stripes( nullptr, static_cast(src_file_idx)}); - // TODO: check for overflow here. - rows_to_read += per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows; + // TODO: change return type to int64_t + rows_to_read += static_cast( + per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows); printf(" rows_to_read : %d / %d\n", (int)per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows, (int)rows_to_read); @@ -220,7 +221,8 @@ aggregate_orc_metadata::select_stripes( for (size_t stripe_idx = 0; stripe_idx < per_file_metadata[src_file_idx].ff.stripes.size() && count < rows_to_skip + rows_to_read; ++stripe_idx) { - count += per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows; + count += + static_cast(per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows); if (count > rows_to_skip || count == 0) { stripe_infos.push_back({&per_file_metadata[src_file_idx].ff.stripes[stripe_idx], nullptr, diff --git a/cpp/src/io/orc/orc.hpp b/cpp/src/io/orc/orc.hpp index 9759c3a0bf1..4a35aaf5107 100644 --- a/cpp/src/io/orc/orc.hpp +++ b/cpp/src/io/orc/orc.hpp @@ -85,7 +85,7 @@ struct StripeInformation { uint64_t indexLength = 0; // the length of the indexes in bytes uint64_t dataLength = 0; // the length of the data in bytes uint64_t footerLength = 0; // the length of the footer in bytes - int64_t numberOfRows = 0; // the number of rows in the stripe + uint64_t numberOfRows = 0; // the number of rows in the stripe }; struct SchemaType { diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index cbdaaed113f..c3f68de21c7 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -743,10 +743,15 @@ void reader::impl::decompress_and_decode() for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { auto const& stripe = selected_stripes[stripe_idx]; auto const stripe_info = stripe.stripe_info; - rows_to_read += stripe_info->numberOfRows; + // TODO: check overflow + // CUDF_EXPECTS(per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows < + // static_cast(std::numeric_limits::max()), + // "TODO"); + rows_to_read += static_cast(stripe_info->numberOfRows); if (_file_itm_data.rows_to_skip > 0) { - CUDF_EXPECTS(_file_itm_data.rows_to_skip < stripe_info->numberOfRows, "TODO"); + CUDF_EXPECTS(_file_itm_data.rows_to_skip < static_cast(stripe_info->numberOfRows), + "TODO"); } } rows_to_read = std::min(rows_to_read - rows_to_skip, _file_itm_data.rows_to_read); @@ -955,7 +960,7 @@ void reader::impl::decompress_and_decode() // printf("line %d\n", __LINE__); // fflush(stdout); - auto const num_rows_per_stripe = stripe_info->numberOfRows; + auto const num_rows_per_stripe = static_cast(stripe_info->numberOfRows); printf(" num_rows_per_stripe : %d\n", (int)num_rows_per_stripe); auto const rowgroup_id = num_rowgroups; @@ -977,7 +982,7 @@ void reader::impl::decompress_and_decode() ? stripe_start_row : col_meta.child_start_row[stripe_idx * num_columns + col_idx]; chunk.num_rows = (level == 0) - ? stripe_info->numberOfRows + ? static_cast(stripe_info->numberOfRows) : col_meta.num_child_rows_per_stripe[stripe_idx * num_columns + col_idx]; printf("col idx: %d, start_row: %d, num rows: %d\n", (int)col_idx, From 54018268d732546b87fef950dc9cff561169f05b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 26 Feb 2024 14:19:48 -0800 Subject: [PATCH 105/321] Fix host memory issue --- cpp/src/io/orc/reader_impl.cu | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index c3f68de21c7..d8c0018dce8 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -667,9 +667,9 @@ void aggregate_child_meta(std::size_t stripe_start, // TODO: Check for overflow here. num_child_rows[child_col_idx] += child_rows; - num_child_rows_per_stripe[stripe_id + stripe_start][child_col_idx] = child_rows; + num_child_rows_per_stripe[stripe_id][child_col_idx] = child_rows; // start row could be different for each column when there is nesting at each stripe level - child_start_row[stripe_id + stripe_start][child_col_idx] = (stripe_id == 0) ? 0 : start_row; + child_start_row[stripe_id][child_col_idx] = (stripe_id == 0) ? 0 : start_row; printf("update child_start_row (%d, %d): %d\n", (int)stripe_id, (int)child_col_idx, @@ -978,12 +978,15 @@ void reader::impl::decompress_and_decode() auto& chunk = chunks[stripe_idx - stripe_start][col_idx]; // start row, number of rows in a each stripe and total number of rows // may change in lower levels of nesting - chunk.start_row = (level == 0) - ? stripe_start_row - : col_meta.child_start_row[stripe_idx * num_columns + col_idx]; - chunk.num_rows = (level == 0) - ? static_cast(stripe_info->numberOfRows) - : col_meta.num_child_rows_per_stripe[stripe_idx * num_columns + col_idx]; + chunk.start_row = + (level == 0) + ? stripe_start_row + : col_meta.child_start_row[(stripe_idx - stripe_start) * num_columns + col_idx]; + chunk.num_rows = + (level == 0) + ? static_cast(stripe_info->numberOfRows) + : col_meta + .num_child_rows_per_stripe[(stripe_idx - stripe_start) * num_columns + col_idx]; printf("col idx: %d, start_row: %d, num rows: %d\n", (int)col_idx, (int)chunk.start_row, From e53cf564c544702b4cd577055e4d9eda30aa3fe1 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 26 Feb 2024 14:39:39 -0800 Subject: [PATCH 106/321] Some cleanup Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index d8c0018dce8..4c550820ad8 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -629,10 +629,10 @@ void aggregate_child_meta(std::size_t stripe_start, printf("\n\n"); // For each parent column, update its child column meta for each stripe. std::for_each(nested_cols.begin(), nested_cols.end(), [&](auto const p_col) { - printf("p_col.id: %d\n", (int)p_col.id); + // printf("p_col.id: %d\n", (int)p_col.id); auto const parent_col_idx = col_meta.orc_col_map[level][p_col.id]; - printf(" level: %d, parent_col_idx: %d\n", (int)level, (int)parent_col_idx); + // printf(" level: %d, parent_col_idx: %d\n", (int)level, (int)parent_col_idx); int64_t start_row = 0; auto processed_row_groups = 0; @@ -640,7 +640,7 @@ void aggregate_child_meta(std::size_t stripe_start, for (std::size_t stripe_id = 0; stripe_id < num_of_stripes; stripe_id++) { // Aggregate num_rows and start_row from processed parent columns per row groups if (num_of_rowgroups) { - printf(" num_of_rowgroups: %d\n", (int)num_of_rowgroups); + // printf(" num_of_rowgroups: %d\n", (int)num_of_rowgroups); auto stripe_num_row_groups = chunks[stripe_id][parent_col_idx].num_rowgroups; auto processed_child_rows = 0; @@ -659,8 +659,8 @@ void aggregate_child_meta(std::size_t stripe_start, // Aggregate start row, number of rows per chunk and total number of rows in a column auto const child_rows = chunks[stripe_id][parent_col_idx].num_child_rows; - printf(" stripe_id: %d: child_rows: %d\n", (int)stripe_id, (int)child_rows); - printf(" p_col.num_children: %d\n", (int)p_col.num_children); + // printf(" stripe_id: %d: child_rows: %d\n", (int)stripe_id, (int)child_rows); + // printf(" p_col.num_children: %d\n", (int)p_col.num_children); for (size_type id = 0; id < p_col.num_children; id++) { auto const child_col_idx = index + id; @@ -670,13 +670,13 @@ void aggregate_child_meta(std::size_t stripe_start, num_child_rows_per_stripe[stripe_id][child_col_idx] = child_rows; // start row could be different for each column when there is nesting at each stripe level child_start_row[stripe_id][child_col_idx] = (stripe_id == 0) ? 0 : start_row; - printf("update child_start_row (%d, %d): %d\n", - (int)stripe_id, - (int)child_col_idx, - (int)start_row); + // printf("update child_start_row (%d, %d): %d\n", + // (int)stripe_id, + // (int)child_col_idx, + // (int)start_row); } start_row += child_rows; - printf(" start_row: %d\n", (int)start_row); + // printf(" start_row: %d\n", (int)start_row); } // Parent column null mask and null count would be required for child column @@ -1120,7 +1120,7 @@ void reader::impl::decompress_and_decode() auto is_list_type = (column_types[i].id() == type_id::LIST); auto n_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[i]; - printf(" create child col, num rows: %d\n", (int)n_rows); + // printf(" create child col, num rows: %d\n", (int)n_rows); // For list column, offset column will be always size + 1 if (is_list_type) n_rows++; From f2ec94ccfce30357fb00a6f8746052da4ca682fe Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 27 Feb 2024 11:37:39 -0800 Subject: [PATCH 107/321] Compute table row size Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 279 +++++++++++++++++++++++++ 1 file changed, 279 insertions(+) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 2b208738b1e..08e3dbe24f0 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -23,11 +23,18 @@ #include "io/orc/reader_impl_helpers.hpp" #include "io/utilities/config_utils.hpp" +#include +#include +#include #include #include #include #include +#include +#include +#include #include +#include #include #include @@ -37,17 +44,153 @@ #include #include +#include #include #include #include #include #include +#include #include #include #include #include +// +// +// +#include + +#include +// +// +// +namespace cudf::experimental { + +enum class decompose_lists_column : bool { YES, NO }; + +auto decompose_structs(table_view table, + decompose_lists_column decompose_lists, + host_span column_order = {}, + host_span null_precedence = {}) +{ + auto linked_columns = detail::table_to_linked_columns(table); + + std::vector verticalized_columns; + std::vector new_column_order; + std::vector new_null_precedence; + std::vector verticalized_col_depths; + for (size_t col_idx = 0; col_idx < linked_columns.size(); ++col_idx) { + detail::linked_column_view const* col = linked_columns[col_idx].get(); + if (is_nested(col->type())) { + // convert and insert + std::vector> flattened; + std::function*, int)> + recursive_child = [&](detail::linked_column_view const* c, + std::vector* branch, + int depth) { + branch->push_back(c); + if (decompose_lists == decompose_lists_column::YES && c->type().id() == type_id::LIST) { + recursive_child( + c->children[lists_column_view::child_column_index].get(), branch, depth + 1); + } else if (c->type().id() == type_id::STRUCT) { + for (size_t child_idx = 0; child_idx < c->children.size(); ++child_idx) { + // When child_idx == 0, we also cut off the current branch if its first child is a + // lists column. + // In such cases, the last column of the current branch will be `Struct` and + // it will be modified to empty struct type `Struct<>` later on. + if (child_idx > 0 || c->children[0]->type().id() == type_id::LIST) { + verticalized_col_depths.push_back(depth + 1); + branch = &flattened.emplace_back(); + } + recursive_child(c->children[child_idx].get(), branch, depth + 1); + } + } + }; + auto& branch = flattened.emplace_back(); + verticalized_col_depths.push_back(0); + recursive_child(col, &branch, 0); + + for (auto const& branch : flattened) { + column_view temp_col = *branch.back(); + + // Change `Struct` into empty struct type `Struct<>`. + if (temp_col.type().id() == type_id::STRUCT && + (temp_col.num_children() > 0 && temp_col.child(0).type().id() == type_id::LIST)) { + temp_col = column_view(temp_col.type(), + temp_col.size(), + temp_col.head(), + temp_col.null_mask(), + temp_col.null_count(), + temp_col.offset(), + {}); + } + + for (auto it = branch.crbegin() + 1; it < branch.crend(); ++it) { + auto const& prev_col = *(*it); + auto children = + (prev_col.type().id() == type_id::LIST) + ? std::vector{*prev_col + .children[lists_column_view::offsets_column_index], + temp_col} + : std::vector{temp_col}; + temp_col = column_view(prev_col.type(), + prev_col.size(), + nullptr, + prev_col.null_mask(), + prev_col.null_count(), + prev_col.offset(), + std::move(children)); + } + // Traverse upward and include any list columns in the ancestors + for (detail::linked_column_view* parent = branch.front()->parent; parent; + parent = parent->parent) { + if (parent->type().id() == type_id::LIST) { + // Include this parent + temp_col = column_view( + parent->type(), + parent->size(), + nullptr, // list has no data of its own + nullptr, // If we're going through this then nullmask is already in another branch + 0, + parent->offset(), + {*parent->children[lists_column_view::offsets_column_index], temp_col}); + } else if (parent->type().id() == type_id::STRUCT) { + // Replace offset with parent's offset + temp_col = column_view(temp_col.type(), + parent->size(), + temp_col.head(), + temp_col.null_mask(), + temp_col.null_count(), + parent->offset(), + {temp_col.child_begin(), temp_col.child_end()}); + } + } + verticalized_columns.push_back(temp_col); + } + if (not column_order.empty()) { + new_column_order.insert(new_column_order.end(), flattened.size(), column_order[col_idx]); + } + if (not null_precedence.empty()) { + new_null_precedence.insert( + new_null_precedence.end(), flattened.size(), null_precedence[col_idx]); + } + } else { + verticalized_columns.push_back(*col); + verticalized_col_depths.push_back(0); + if (not column_order.empty()) { new_column_order.push_back(column_order[col_idx]); } + if (not null_precedence.empty()) { new_null_precedence.push_back(null_precedence[col_idx]); } + } + } + return std::make_tuple(table_view(verticalized_columns), + std::move(new_column_order), + std::move(new_null_precedence), + std::move(verticalized_col_depths)); +} +} // namespace cudf::experimental + namespace cudf::io::orc::detail { std::size_t gather_stream_info_and_column_desc( @@ -744,4 +887,140 @@ void reader::impl::load_data() _chunk_read_data.curr_decode_stripe_chunk = 0; } +namespace { + +// Default 10k rows. +size_type constexpr SEGMENT_SIZE = 10'000; + +/** + * @brief Functor which computes the total data size for a given type of a cudf column. + * + * In the case of strings, the return size does not include the chars themselves. That + * information is tracked separately (see PageInfo::str_bytes). + * + * TODO + */ +struct column_segment_size_functor { + column_device_view d_col; + size_type size; + + __device__ std::size_t num_rows(size_type start_row) const + { + return cuda::std::min(size, d_col.size() - start_row); + } + + __device__ std::size_t validity_size(size_type start_row) const + { + return d_col.nullable() + ? cudf::util::div_rounding_up_safe(num_rows(start_row), std::size_t{32}) * 4ul + : 0ul; + } + + template () && !cudf::is_nested() && + !std::is_same_v)> + __device__ std::size_t operator()(size_type) const + { + CUDF_UNREACHABLE("Attempted to find size of unsupported types."); + } + + template ())> + __device__ std::size_t operator()(size_type start_row) const + { + auto constexpr element_size = sizeof(device_storage_type_t); + return element_size * num_rows(start_row) + validity_size(start_row); + } + + template )> + __device__ std::size_t operator()(size_type start_row) const + { + auto const offsets = d_col.child(strings_column_view::offsets_column_index); + auto const offsetalator = cudf::detail::input_offsetalator(offsets.head(), offsets.type()); + auto const char_begin = offsetalator[start_row]; + auto const char_end = offsetalator[start_row + num_rows(start_row)]; + auto const chars_size = char_end - char_begin; + + // NOTE: Adding the + 1 offset, similar to the case of lists column. + auto const offset_size = + offsets.type().id() == type_id::INT32 ? sizeof(int32_t) : sizeof(int64_t); + return offset_size * (num_rows(start_row) + 1) + validity_size(start_row) + chars_size; + } + + template ())> + __device__ std::size_t operator()(size_type start_row) const + { + auto constexpr element_size = sizeof(device_storage_type_t); + + auto col = d_col; + auto col_size = element_size + validity_size(start_row); + auto child_start_row = start_row; + auto child_size = size; + + while (col.type().id() == type_id::STRUCT || col.type().id() == type_id::LIST) { + if (col.type().id() == type_id::STRUCT) { + // Empty struct. + if (col.num_child_columns() == 0) { return col_size; } + col = col.child(0); + } else { + auto const offsets = col.child(lists_column_view::offsets_column_index); + col = col.child(lists_column_view::child_column_index); + + auto const child_end_row = offsets.element(start_row + num_rows(start_row)); + child_start_row = offsets.element(start_row); + child_size = child_end_row - child_start_row; + + // NOTE: Adding the + 1 offset here isn't strictly correct. There will only be 1 extra + // offset for the entire column so we will get a small over-estimate of the real size. + auto constexpr offset_size = sizeof(size_type); + col_size += offset_size * (num_rows(start_row) + 1); + } + } + + return col_size + type_dispatcher( + col.type(), column_segment_size_functor{col, child_size}, child_start_row); + } +}; + +struct table_segment_size_functor { + table_device_view d_table; + size_type size; + + __device__ std::size_t operator()(size_type start_row) const + { + auto const col_size = [=](column_device_view col) { + return cudf::type_dispatcher(col.type(), column_segment_size_functor{col, size}, start_row); + }; + + return thrust::transform_reduce( + thrust::seq, d_table.begin(), d_table.end(), col_size, 0ul, thrust::plus<>{}); + } +}; + +} // namespace + +void test(table_view const& input, rmm::cuda_stream_view stream) +{ + auto verticalized_t = std::get<0>( + cudf::experimental::decompose_structs(input, cudf::experimental::decompose_lists_column::YES)); + auto d_t = table_device_view::create(verticalized_t, stream); + + auto const num_segments = input.num_rows() / SEGMENT_SIZE; + auto output = make_fixed_width_column( + data_type{type_id::UINT64}, num_segments, mask_state::UNALLOCATED, stream); + + auto s = thrust::transform( + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_segments), + output->mutable_view().begin(), + cuda::proclaim_return_type( + [SEGMENT_SIZE = SEGMENT_SIZE, d_table = *d_t] __device__(auto const segment_idx) { + auto const start_row = segment_idx * SEGMENT_SIZE; + return table_segment_size_functor{d_table, SEGMENT_SIZE}(start_row); + })); + + printf("segment size: \n"); + cudf::test::print(output->view()); + fflush(stdout); +} + } // namespace cudf::io::orc::detail From fd325b6224a858117d7ca4e1fe9697cfe05d56e3 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 27 Feb 2024 15:30:48 -0800 Subject: [PATCH 108/321] Compute column row size Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 28 +++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 08e3dbe24f0..6b8733827c1 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -892,6 +892,8 @@ namespace { // Default 10k rows. size_type constexpr SEGMENT_SIZE = 10'000; +// size_type constexpr SEGMENT_SIZE = 1; + /** * @brief Functor which computes the total data size for a given type of a cudf column. * @@ -943,20 +945,22 @@ struct column_segment_size_functor { // NOTE: Adding the + 1 offset, similar to the case of lists column. auto const offset_size = offsets.type().id() == type_id::INT32 ? sizeof(int32_t) : sizeof(int64_t); + // printf(" offset sizes: %d, char size: %d\n", (int)offset_size, (int)chars_size); + return offset_size * (num_rows(start_row) + 1) + validity_size(start_row) + chars_size; } template ())> __device__ std::size_t operator()(size_type start_row) const { - auto constexpr element_size = sizeof(device_storage_type_t); - auto col = d_col; - auto col_size = element_size + validity_size(start_row); + auto col_size = std::size_t{0}; auto child_start_row = start_row; auto child_size = size; while (col.type().id() == type_id::STRUCT || col.type().id() == type_id::LIST) { + col_size += validity_size(start_row); + if (col.type().id() == type_id::STRUCT) { // Empty struct. if (col.num_child_columns() == 0) { return col_size; } @@ -987,10 +991,19 @@ struct table_segment_size_functor { __device__ std::size_t operator()(size_type start_row) const { + // printf("line %d, start row %d\n", __LINE__, start_row); + auto const col_size = [=](column_device_view col) { return cudf::type_dispatcher(col.type(), column_segment_size_functor{col, size}, start_row); }; + // for (auto col : d_table) { + // auto t = cudf::type_dispatcher(col.type(), column_segment_size_functor{col, size}, + // start_row); printf("start: %d, col size: %d\n", start_row, (int)t); + // } + + // printf("line %d\n", __LINE__); + return thrust::transform_reduce( thrust::seq, d_table.begin(), d_table.end(), col_size, 0ul, thrust::plus<>{}); } @@ -1002,13 +1015,18 @@ void test(table_view const& input, rmm::cuda_stream_view stream) { auto verticalized_t = std::get<0>( cudf::experimental::decompose_structs(input, cudf::experimental::decompose_lists_column::YES)); + auto d_t = table_device_view::create(verticalized_t, stream); - auto const num_segments = input.num_rows() / SEGMENT_SIZE; - auto output = make_fixed_width_column( + auto const num_segments = std::max(input.num_rows() / SEGMENT_SIZE, 1); + printf("num rows: %d, num seeg: %d\n", input.num_rows(), num_segments); + fflush(stdout); + + auto output = make_fixed_width_column( data_type{type_id::UINT64}, num_segments, mask_state::UNALLOCATED, stream); auto s = thrust::transform( + rmm::exec_policy(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_segments), output->mutable_view().begin(), From 416d810a357da3ed8da0259f84eab2903ebd6a4f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 27 Feb 2024 15:30:55 -0800 Subject: [PATCH 109/321] Test column size Signed-off-by: Nghia Truong --- cpp/tests/io/orc_test.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index 24e2e2cfea0..d4e497d7ecd 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -39,6 +39,10 @@ #include +namespace cudf::io::orc::detail { +void test(table_view const& input, rmm::cuda_stream_view stream); +} + template using column_wrapper = typename std::conditional, @@ -774,6 +778,8 @@ TEST_F(OrcChunkedWriterTest, Metadata) auto result = cudf::io::read_orc(read_opts); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); + + cudf::io::orc::detail::test(expected, cudf::get_default_stream()); } TEST_F(OrcChunkedWriterTest, Strings) @@ -1388,6 +1394,8 @@ TEST_P(OrcWriterTestStripes, StripeSize) auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view()); + + cudf::io::orc::detail::test(expected->view(), cudf::get_default_stream()); }; { @@ -1484,6 +1492,8 @@ TEST_F(OrcWriterTest, TestMap) CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); + + cudf::io::orc::detail::test(cudf::table_view{{*list_col}}, cudf::get_default_stream()); } TEST_F(OrcReaderTest, NestedColumnSelection) From b745787f841c208555467303667ee15c48e1248a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 28 Feb 2024 11:43:08 -0800 Subject: [PATCH 110/321] Test column sizes using `segmented_bit_count` Signed-off-by: Nghia Truong --- cpp/include/cudf/detail/transform.hpp | 12 +++- cpp/src/io/orc/reader_impl_chunking.cu | 55 +++++++++++++++-- cpp/src/transform/row_bit_count.cu | 82 ++++++++++++++++++-------- 3 files changed, 118 insertions(+), 31 deletions(-) diff --git a/cpp/include/cudf/detail/transform.hpp b/cpp/include/cudf/detail/transform.hpp index 215ad50aed6..0ce7037b9e8 100644 --- a/cpp/include/cudf/detail/transform.hpp +++ b/cpp/include/cudf/detail/transform.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -100,5 +100,15 @@ std::unique_ptr row_bit_count(table_view const& t, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @copydoc cudf::segmented_bit_count + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr segmented_bit_count(table_view const& t, + size_type segment_length, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + } // namespace detail } // namespace cudf diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 6b8733827c1..5fdf834f6bc 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -25,8 +25,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -930,6 +932,13 @@ struct column_segment_size_functor { __device__ std::size_t operator()(size_type start_row) const { auto constexpr element_size = sizeof(device_storage_type_t); + + if (start_row == 0) { + printf(" col size: %d (valid: %d)\n", + (int)(element_size * num_rows(start_row) + validity_size(start_row)), + (int)validity_size(start_row)); + } + return element_size * num_rows(start_row) + validity_size(start_row); } @@ -961,10 +970,14 @@ struct column_segment_size_functor { while (col.type().id() == type_id::STRUCT || col.type().id() == type_id::LIST) { col_size += validity_size(start_row); + if (start_row == 0) { printf(" add valid size: %d\n", (int)validity_size(start_row)); } + if (col.type().id() == type_id::STRUCT) { // Empty struct. if (col.num_child_columns() == 0) { return col_size; } col = col.child(0); + + if (start_row == 0) { printf(" struct, move down\n"); } } else { auto const offsets = col.child(lists_column_view::offsets_column_index); col = col.child(lists_column_view::child_column_index); @@ -977,6 +990,10 @@ struct column_segment_size_functor { // offset for the entire column so we will get a small over-estimate of the real size. auto constexpr offset_size = sizeof(size_type); col_size += offset_size * (num_rows(start_row) + 1); + + if (start_row == 0) { + printf(" list, add offst size: %d\n", (int)(offset_size * (num_rows(start_row) + 1))); + } } } @@ -994,16 +1011,18 @@ struct table_segment_size_functor { // printf("line %d, start row %d\n", __LINE__, start_row); auto const col_size = [=](column_device_view col) { + if (start_row == 0) { printf("compute new col %d\n", __LINE__); } return cudf::type_dispatcher(col.type(), column_segment_size_functor{col, size}, start_row); }; - // for (auto col : d_table) { - // auto t = cudf::type_dispatcher(col.type(), column_segment_size_functor{col, size}, - // start_row); printf("start: %d, col size: %d\n", start_row, (int)t); + // if (start_row == 0) { + // for (auto col : d_table) { + // auto t = + // cudf::type_dispatcher(col.type(), column_segment_size_functor{col, size}, start_row); + // printf("start: %d, col size: %d\n", start_row, (int)t); + // } // } - // printf("line %d\n", __LINE__); - return thrust::transform_reduce( thrust::seq, d_table.begin(), d_table.end(), col_size, 0ul, thrust::plus<>{}); } @@ -1016,6 +1035,20 @@ void test(table_view const& input, rmm::cuda_stream_view stream) auto verticalized_t = std::get<0>( cudf::experimental::decompose_structs(input, cudf::experimental::decompose_lists_column::YES)); + auto sliced_in = std::move(cudf::slice(input, {0, 5})[0]); + for (auto col : sliced_in) { + printf("=====sliced in col: \n"); + cudf::test::print(col); + } + fflush(stdout); + + auto sliced_in_v = std::move(cudf::slice(verticalized_t, {0, 5})[0]); + for (auto col : sliced_in_v) { + printf("=====sliced_in_v: \n"); + cudf::test::print(col); + } + fflush(stdout); + auto d_t = table_device_view::create(verticalized_t, stream); auto const num_segments = std::max(input.num_rows() / SEGMENT_SIZE, 1); @@ -1039,6 +1072,18 @@ void test(table_view const& input, rmm::cuda_stream_view stream) printf("segment size: \n"); cudf::test::print(output->view()); fflush(stdout); + + auto out = cudf::detail::segmented_bit_count( + input, SEGMENT_SIZE, stream, rmm::mr::get_current_device_resource()); + thrust::transform(rmm::exec_policy(stream), + out->view().begin(), + out->view().end(), + out->mutable_view().begin(), + cuda::proclaim_return_type([] __device__(auto const x) { return x / 8; })); + + printf("segment size again: \n"); + cudf::test::print(out->view()); + fflush(stdout); } } // namespace cudf::io::orc::detail diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu index eda8ec7a463..8c0a805b00f 100644 --- a/cpp/src/transform/row_bit_count.cu +++ b/cpp/src/transform/row_bit_count.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -31,8 +32,10 @@ #include #include -#include +#include +#include #include +#include namespace cudf { namespace detail { @@ -398,26 +401,29 @@ __device__ size_type row_size_functor::operator()(column_device_vie * @param cols An span of column_device_views representing a column hierarchy * @param info An span of column_info structs corresponding the elements in `cols` * @param output Output span of size (# rows) where per-row bit sizes are stored + * @param segment_length The number of rows in each segment for which the total size is computed * @param max_branch_depth Maximum depth of the span stack needed per-thread */ CUDF_KERNEL void compute_row_sizes(device_span cols, device_span info, device_span output, + size_type segment_length, size_type max_branch_depth) { extern __shared__ row_span thread_branch_stacks[]; int const tid = threadIdx.x + blockIdx.x * blockDim.x; - auto const num_rows = output.size(); - if (tid >= num_rows) { return; } + auto const num_segments = static_cast(output.size()); + if (tid >= num_segments) { return; } // my_branch_stack points to the last span prior to branching. a branch occurs only // when we are inside of a list contained within a struct column. row_span* my_branch_stack = thread_branch_stacks + (threadIdx.x * max_branch_depth); size_type branch_depth{0}; - // current row span - always starts at 1 row. - row_span cur_span{tid, tid + 1}; + // current row span - always starts at spanning over `segment_length` rows. + auto const num_rows = cols[0].size(); + row_span cur_span{tid * segment_length, cuda::std::min((tid + 1) * segment_length, num_rows)}; // output size size_type& size = output[tid]; @@ -444,7 +450,8 @@ CUDF_KERNEL void compute_row_sizes(device_span cols, if (info[idx].depth == 0) { branch_depth = 0; last_branch_depth = 0; - cur_span = row_span{tid, tid + 1}; + cur_span = + row_span{tid * segment_length, cuda::std::min((tid + 1) * segment_length, num_rows)}; } // add the contributing size of this row @@ -465,14 +472,13 @@ CUDF_KERNEL void compute_row_sizes(device_span cols, } // anonymous namespace -/** - * @copydoc cudf::detail::row_bit_count - * - */ -std::unique_ptr row_bit_count(table_view const& t, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr segmented_bit_count(table_view const& t, + size_type segment_length, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { + CUDF_EXPECTS(segment_length >= 1, "Invalid segment length.", std::invalid_argument); + // no rows if (t.num_rows() <= 0) { return cudf::make_empty_column(type_id::INT32); } @@ -484,17 +490,31 @@ std::unique_ptr row_bit_count(table_view const& t, CUDF_EXPECTS(info.size() == cols.size(), "Size/info mismatch"); // create output buffer and view - auto output = cudf::make_fixed_width_column( - data_type{type_id::INT32}, t.num_rows(), mask_state::UNALLOCATED, stream, mr); + auto const num_segments = cudf::util::div_rounding_up_safe(t.num_rows(), segment_length); + auto output = cudf::make_fixed_width_column( + data_type{type_id::INT32}, num_segments, mask_state::UNALLOCATED, stream, mr); mutable_column_view mcv = output->mutable_view(); // simple case. if we have no complex types (lists, strings, etc), the per-row size is already // trivially computed if (h_info.complex_type_count <= 0) { - thrust::fill(rmm::exec_policy(stream), - mcv.begin(), - mcv.end(), - h_info.simple_per_row_size); + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_segments), + mcv.begin(), + cuda::proclaim_return_type( + [segment_length, + num_segments, + num_rows = t.num_rows(), + per_row_size = h_info.simple_per_row_size] __device__(size_type const segment_idx) { + // Since the number of rows may not divisible by segment_length, + // the last segment may be shorter than the others. + auto const current_length = segment_idx + 1 < num_segments + ? segment_length + : num_rows - segment_length * segment_idx; + return per_row_size * current_length; + })); return output; } @@ -523,22 +543,34 @@ std::unique_ptr row_bit_count(table_view const& t, // should we be aborting if we reach some extremely small block size, or just if we hit 0? CUDF_EXPECTS(block_size > 0, "Encountered a column hierarchy too complex for row_bit_count"); - cudf::detail::grid_1d grid{t.num_rows(), block_size, 1}; + cudf::detail::grid_1d grid{num_segments, block_size, 1}; compute_row_sizes<<>>( {std::get<1>(d_cols), cols.size()}, {d_info.data(), info.size()}, - {mcv.data(), static_cast(t.num_rows())}, + {mcv.data(), static_cast(mcv.size())}, + segment_length, h_info.max_branch_depth); return output; } +std::unique_ptr row_bit_count(table_view const& t, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return segmented_bit_count(t, 1, stream, mr); +} + } // namespace detail -/** - * @copydoc cudf::row_bit_count - * - */ +std::unique_ptr segmented_bit_count(table_view const& t, + size_type segment_length, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::segmented_bit_count(t, segment_length, cudf::get_default_stream(), mr); +} + std::unique_ptr row_bit_count(table_view const& t, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); From d0ed05a62a1951fdabf748cabeb9732fce516ebc Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 28 Feb 2024 14:57:17 -0800 Subject: [PATCH 111/321] Compute table sizes using `segmented_bit_count` Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 83 ++++++++- cpp/src/io/orc/reader_impl.hpp | 12 +- cpp/src/io/orc/reader_impl_chunking.cu | 221 +----------------------- cpp/src/io/orc/reader_impl_chunking.hpp | 26 +++ cpp/tests/io/orc_test.cpp | 10 -- 5 files changed, 111 insertions(+), 241 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 4c550820ad8..05f881fab71 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -23,7 +23,6 @@ // // // - #include "io/comp/gpuinflate.hpp" #include "io/comp/nvcomp_adapter.hpp" #include "io/orc/reader_impl.hpp" @@ -33,6 +32,7 @@ #include #include +#include #include #include #include @@ -45,6 +45,7 @@ #include #include +#include #include #include #include @@ -720,6 +721,46 @@ void generate_offsets_for_list(host_span buff_data, rmm::cuda_ } } +/** + * @brief TODO + * @param input + * @param size_limit + * @param stream + * @return + */ +std::vector find_table_splits(table_view const& input, + std::size_t size_limit, + rmm::cuda_stream_view stream) +{ + // Default 10k rows. + size_type constexpr SEGMENT_SIZE = 10'000; + auto const d_segmented_sizes = cudf::detail::segmented_bit_count( + input, SEGMENT_SIZE, stream, rmm::mr::get_current_device_resource()); + auto const d_size_begin = d_segmented_sizes->view().begin(); + + auto segmented_sizes = + cudf::detail::hostdevice_vector(d_segmented_sizes->size(), stream); + + // TODO: exec_policy_nosync + thrust::transform(rmm::exec_policy(stream), + d_size_begin, + d_size_begin + d_segmented_sizes->size(), + segmented_sizes.d_begin(), + [SEGMENT_SIZE] __device__(auto const size) { + return cumulative_size{SEGMENT_SIZE, static_cast(size)}; + }); + // TODO: exec_policy_nosync + thrust::inclusive_scan(rmm::exec_policy(stream), + segmented_sizes.d_begin(), + segmented_sizes.d_end(), + segmented_sizes.d_begin(), + cumulative_size_sum{}); + segmented_sizes.device_to_host_sync(stream); + + // Since the segment sizes are in bits, we need to multiply CHAR_BIT with the output limit. + return find_splits(segmented_sizes, input.num_rows(), size_limit * CHAR_BIT); +} + } // namespace // TODO: this should be called per chunk of stripes. @@ -1176,6 +1217,34 @@ void reader::impl::decompress_and_decode() // printf("line %d\n", __LINE__); // fflush(stdout); } // end loop level + + std::vector> out_columns; + _out_metadata = get_meta_with_user_data(); + std::transform( + _selected_columns.levels[0].begin(), + _selected_columns.levels[0].end(), + std::back_inserter(out_columns), + [&](auto const& orc_col_meta) { + _out_metadata.schema_info.emplace_back(""); + auto col_buffer = assemble_buffer( + orc_col_meta.id, 0, *_col_meta, _metadata, _selected_columns, _out_buffers, _stream, _mr); + return make_column(col_buffer, &_out_metadata.schema_info.back(), std::nullopt, _stream); + }); + _decoded_table = std::make_unique
(std::move(out_columns)); + + // DEBUG only + _chunk_read_data.output_size_limit = _chunk_read_data.data_read_limit / 3; + + _chunk_read_data.output_table_chunks = + find_table_splits(_decoded_table->view(), _chunk_read_data.output_size_limit, _stream); + _chunk_read_data.curr_output_table_chunk = 0; + + auto& splits = _chunk_read_data.output_table_chunks; + printf("------------\nSplits (/total num rows = %d): \n", (int)_decoded_table->num_rows()); + for (size_t idx = 0; idx < splits.size(); idx++) { + printf("{%ld, %ld}\n", splits[idx].start_idx, splits[idx].count); + } + fflush(stdout); } void reader::impl::prepare_data(int64_t skip_rows, @@ -1217,7 +1286,7 @@ table_with_metadata reader::impl::make_output_chunk() if (_selected_columns.num_levels() == 0) { return {std::make_unique
(), table_metadata{}}; } std::vector> out_columns; - auto out_metadata = make_output_metadata(); + auto out_metadata = get_meta_with_user_data(); // If no rows or stripes to read, return empty columns if (_file_itm_data.has_no_data() /*|| !_chunk_read_data.has_next()*/) { @@ -1248,7 +1317,7 @@ table_with_metadata reader::impl::make_output_chunk() for (auto& buffers : _file_itm_data.out_buffers) { // out_columns.clear(); // TODO: remove - out_metadata = make_output_metadata(); + out_metadata = get_meta_with_user_data(); std::transform(_selected_columns.levels[0].begin(), _selected_columns.levels[0].end(), @@ -1334,9 +1403,9 @@ table_with_metadata reader::impl::make_output_chunk() return {std::move(out_table), std::move(out_metadata)}; } -table_metadata reader::impl::make_output_metadata() +table_metadata reader::impl::get_meta_with_user_data() { - if (_out_metadata) { return table_metadata{*_out_metadata}; } + if (_meta_with_user_data) { return table_metadata{*_meta_with_user_data}; } // Copy user data to the output metadata. table_metadata out_metadata; @@ -1357,8 +1426,8 @@ table_metadata reader::impl::make_output_metadata() out_metadata.user_data = {out_metadata.per_file_user_data[0].begin(), out_metadata.per_file_user_data[0].end()}; - // Save the output table metadata into `_out_metadata` for reuse next time. - _out_metadata = std::make_unique(out_metadata); + // Save the output table metadata into `_meta_with_user_data` for reuse next time. + _meta_with_user_data = std::make_unique(out_metadata); return out_metadata; } diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index b95e9e244a0..b94f639c05d 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -141,7 +141,7 @@ class reader::impl { void decompress_and_decode(); /** - * @brief Create the output table from the internal buffers and return it along with metadata. + * @brief Create the output table from the intermediate table and return it along with metadata. * * This function is called internally and expects all preprocessing steps have already been done. * @@ -150,11 +150,11 @@ class reader::impl { table_with_metadata make_output_chunk(); /** - * @brief Create the output table metadata from file metadata. + * @brief Create the output table metadata storing user data in source metadata. * - * @return Columns' metadata to output with the table read from file + * @return Columns' user data to output with the table read from file */ - table_metadata make_output_metadata(); + table_metadata get_meta_with_user_data(); rmm::cuda_stream_view const _stream; rmm::mr::device_memory_resource* const _mr; @@ -174,8 +174,10 @@ class reader::impl { column_hierarchy const _selected_columns; // Construct from `_metadata` thus declare after it file_intermediate_data _file_itm_data; chunk_read_data _chunk_read_data; - std::unique_ptr _out_metadata; + std::unique_ptr _meta_with_user_data; + table_metadata _out_metadata; std::vector> _out_buffers; + std::unique_ptr _decoded_table; }; } // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 5fdf834f6bc..af959b78af8 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -28,13 +28,11 @@ #include #include #include -#include #include #include #include #include #include -#include #include #include #include @@ -304,26 +302,6 @@ std::size_t gather_stream_info_and_column_desc( return dst_offset; } -namespace { - -/** - * @brief Struct to accummulate sizes of chunks of some data such as stripe or rows. - */ -struct cumulative_size { - int64_t count{0}; - std::size_t size_bytes{0}; -}; - -/** - * @brief Functor to sum up cumulative sizes. - */ -struct cumulative_size_sum { - __device__ cumulative_size operator()(cumulative_size const& a, cumulative_size const& b) const - { - return cumulative_size{a.count + b.count, a.size_bytes + b.size_bytes}; - } -}; - #if 1 /** * @brief Find the splits of the input data such that each split has cumulative size less than a @@ -388,6 +366,8 @@ std::vector find_splits(host_span sizes, } #endif +namespace { + #ifdef PRINT_DEBUG /** * @brief Verify the splits, checking if they are correct. @@ -889,201 +869,4 @@ void reader::impl::load_data() _chunk_read_data.curr_decode_stripe_chunk = 0; } -namespace { - -// Default 10k rows. -size_type constexpr SEGMENT_SIZE = 10'000; - -// size_type constexpr SEGMENT_SIZE = 1; - -/** - * @brief Functor which computes the total data size for a given type of a cudf column. - * - * In the case of strings, the return size does not include the chars themselves. That - * information is tracked separately (see PageInfo::str_bytes). - * - * TODO - */ -struct column_segment_size_functor { - column_device_view d_col; - size_type size; - - __device__ std::size_t num_rows(size_type start_row) const - { - return cuda::std::min(size, d_col.size() - start_row); - } - - __device__ std::size_t validity_size(size_type start_row) const - { - return d_col.nullable() - ? cudf::util::div_rounding_up_safe(num_rows(start_row), std::size_t{32}) * 4ul - : 0ul; - } - - template () && !cudf::is_nested() && - !std::is_same_v)> - __device__ std::size_t operator()(size_type) const - { - CUDF_UNREACHABLE("Attempted to find size of unsupported types."); - } - - template ())> - __device__ std::size_t operator()(size_type start_row) const - { - auto constexpr element_size = sizeof(device_storage_type_t); - - if (start_row == 0) { - printf(" col size: %d (valid: %d)\n", - (int)(element_size * num_rows(start_row) + validity_size(start_row)), - (int)validity_size(start_row)); - } - - return element_size * num_rows(start_row) + validity_size(start_row); - } - - template )> - __device__ std::size_t operator()(size_type start_row) const - { - auto const offsets = d_col.child(strings_column_view::offsets_column_index); - auto const offsetalator = cudf::detail::input_offsetalator(offsets.head(), offsets.type()); - auto const char_begin = offsetalator[start_row]; - auto const char_end = offsetalator[start_row + num_rows(start_row)]; - auto const chars_size = char_end - char_begin; - - // NOTE: Adding the + 1 offset, similar to the case of lists column. - auto const offset_size = - offsets.type().id() == type_id::INT32 ? sizeof(int32_t) : sizeof(int64_t); - // printf(" offset sizes: %d, char size: %d\n", (int)offset_size, (int)chars_size); - - return offset_size * (num_rows(start_row) + 1) + validity_size(start_row) + chars_size; - } - - template ())> - __device__ std::size_t operator()(size_type start_row) const - { - auto col = d_col; - auto col_size = std::size_t{0}; - auto child_start_row = start_row; - auto child_size = size; - - while (col.type().id() == type_id::STRUCT || col.type().id() == type_id::LIST) { - col_size += validity_size(start_row); - - if (start_row == 0) { printf(" add valid size: %d\n", (int)validity_size(start_row)); } - - if (col.type().id() == type_id::STRUCT) { - // Empty struct. - if (col.num_child_columns() == 0) { return col_size; } - col = col.child(0); - - if (start_row == 0) { printf(" struct, move down\n"); } - } else { - auto const offsets = col.child(lists_column_view::offsets_column_index); - col = col.child(lists_column_view::child_column_index); - - auto const child_end_row = offsets.element(start_row + num_rows(start_row)); - child_start_row = offsets.element(start_row); - child_size = child_end_row - child_start_row; - - // NOTE: Adding the + 1 offset here isn't strictly correct. There will only be 1 extra - // offset for the entire column so we will get a small over-estimate of the real size. - auto constexpr offset_size = sizeof(size_type); - col_size += offset_size * (num_rows(start_row) + 1); - - if (start_row == 0) { - printf(" list, add offst size: %d\n", (int)(offset_size * (num_rows(start_row) + 1))); - } - } - } - - return col_size + type_dispatcher( - col.type(), column_segment_size_functor{col, child_size}, child_start_row); - } -}; - -struct table_segment_size_functor { - table_device_view d_table; - size_type size; - - __device__ std::size_t operator()(size_type start_row) const - { - // printf("line %d, start row %d\n", __LINE__, start_row); - - auto const col_size = [=](column_device_view col) { - if (start_row == 0) { printf("compute new col %d\n", __LINE__); } - return cudf::type_dispatcher(col.type(), column_segment_size_functor{col, size}, start_row); - }; - - // if (start_row == 0) { - // for (auto col : d_table) { - // auto t = - // cudf::type_dispatcher(col.type(), column_segment_size_functor{col, size}, start_row); - // printf("start: %d, col size: %d\n", start_row, (int)t); - // } - // } - - return thrust::transform_reduce( - thrust::seq, d_table.begin(), d_table.end(), col_size, 0ul, thrust::plus<>{}); - } -}; - -} // namespace - -void test(table_view const& input, rmm::cuda_stream_view stream) -{ - auto verticalized_t = std::get<0>( - cudf::experimental::decompose_structs(input, cudf::experimental::decompose_lists_column::YES)); - - auto sliced_in = std::move(cudf::slice(input, {0, 5})[0]); - for (auto col : sliced_in) { - printf("=====sliced in col: \n"); - cudf::test::print(col); - } - fflush(stdout); - - auto sliced_in_v = std::move(cudf::slice(verticalized_t, {0, 5})[0]); - for (auto col : sliced_in_v) { - printf("=====sliced_in_v: \n"); - cudf::test::print(col); - } - fflush(stdout); - - auto d_t = table_device_view::create(verticalized_t, stream); - - auto const num_segments = std::max(input.num_rows() / SEGMENT_SIZE, 1); - printf("num rows: %d, num seeg: %d\n", input.num_rows(), num_segments); - fflush(stdout); - - auto output = make_fixed_width_column( - data_type{type_id::UINT64}, num_segments, mask_state::UNALLOCATED, stream); - - auto s = thrust::transform( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_segments), - output->mutable_view().begin(), - cuda::proclaim_return_type( - [SEGMENT_SIZE = SEGMENT_SIZE, d_table = *d_t] __device__(auto const segment_idx) { - auto const start_row = segment_idx * SEGMENT_SIZE; - return table_segment_size_functor{d_table, SEGMENT_SIZE}(start_row); - })); - - printf("segment size: \n"); - cudf::test::print(output->view()); - fflush(stdout); - - auto out = cudf::detail::segmented_bit_count( - input, SEGMENT_SIZE, stream, rmm::mr::get_current_device_resource()); - thrust::transform(rmm::exec_policy(stream), - out->view().begin(), - out->view().end(), - out->mutable_view().begin(), - cuda::proclaim_return_type([] __device__(auto const x) { return x / 8; })); - - printf("segment size again: \n"); - cudf::test::print(out->view()); - fflush(stdout); -} - } // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 1542182ed7f..ba1de2e7525 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -240,6 +240,32 @@ struct chunk_read_data { } }; +/** + * @brief Struct to accumulate sizes of chunks of some data such as stripe or rows. + */ +struct cumulative_size { + int64_t count{0}; + std::size_t size_bytes{0}; +}; + +/** + * @brief Functor to sum up cumulative sizes. + */ +struct cumulative_size_sum { + __device__ cumulative_size operator()(cumulative_size const& a, cumulative_size const& b) const + { + return cumulative_size{a.count + b.count, a.size_bytes + b.size_bytes}; + } +}; + +/** + * @brief Find the splits of the input data such that each split has cumulative size less than a + * given `size_limit`. + */ +std::vector find_splits(host_span sizes, + int64_t total_count, + size_t size_limit); + /** * @brief Function that populates descriptors for either individual streams or chunks of column * data, but not both. diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index d4e497d7ecd..24e2e2cfea0 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -39,10 +39,6 @@ #include -namespace cudf::io::orc::detail { -void test(table_view const& input, rmm::cuda_stream_view stream); -} - template using column_wrapper = typename std::conditional, @@ -778,8 +774,6 @@ TEST_F(OrcChunkedWriterTest, Metadata) auto result = cudf::io::read_orc(read_opts); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); - - cudf::io::orc::detail::test(expected, cudf::get_default_stream()); } TEST_F(OrcChunkedWriterTest, Strings) @@ -1394,8 +1388,6 @@ TEST_P(OrcWriterTestStripes, StripeSize) auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view()); - - cudf::io::orc::detail::test(expected->view(), cudf::get_default_stream()); }; { @@ -1492,8 +1484,6 @@ TEST_F(OrcWriterTest, TestMap) CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); - - cudf::io::orc::detail::test(cudf::table_view{{*list_col}}, cudf::get_default_stream()); } TEST_F(OrcReaderTest, NestedColumnSelection) From ae06017094b36b8a4d6bc23da34ea830c3970190 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 28 Feb 2024 16:05:52 -0800 Subject: [PATCH 112/321] Temporary store multiple decoded tables Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 49 ++++--------------------- cpp/src/io/orc/reader_impl_chunking.hpp | 2 +- 2 files changed, 9 insertions(+), 42 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 05f881fab71..ae46b3c6d48 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -820,6 +820,7 @@ void reader::impl::decompress_and_decode() // TODO: move this to global step lvl_chunks.resize(_selected_columns.num_levels()); + _out_buffers.clear(); _out_buffers.resize(_selected_columns.num_levels()); // @@ -1268,7 +1269,7 @@ void reader::impl::prepare_data(int64_t skip_rows, while (_chunk_read_data.more_stripe_to_decode()) { decompress_and_decode(); - _file_itm_data.out_buffers.push_back(std::move(_out_buffers)); + _file_itm_data.out_tables.push_back(std::move(_decoded_table)); } } printf("done load and decode data\n\n"); @@ -1285,12 +1286,11 @@ table_with_metadata reader::impl::make_output_chunk() // There is no columns in the table. if (_selected_columns.num_levels() == 0) { return {std::make_unique
(), table_metadata{}}; } - std::vector> out_columns; - auto out_metadata = get_meta_with_user_data(); - // If no rows or stripes to read, return empty columns if (_file_itm_data.has_no_data() /*|| !_chunk_read_data.has_next()*/) { printf("has no next\n"); + std::vector> out_columns; + auto out_metadata = get_meta_with_user_data(); std::transform(_selected_columns.levels[0].begin(), _selected_columns.levels[0].end(), std::back_inserter(out_columns), @@ -1307,43 +1307,10 @@ table_with_metadata reader::impl::make_output_chunk() return {std::make_unique
(std::move(out_columns)), std::move(out_metadata)}; } - // TODO: move this into decompress_and_decode - // Create columns from buffer with respective schema information. - - // TODO: remove - std::vector> tabs; std::vector tv; - for (auto& buffers : _file_itm_data.out_buffers) { - // - out_columns.clear(); // TODO: remove - out_metadata = get_meta_with_user_data(); - - std::transform(_selected_columns.levels[0].begin(), - _selected_columns.levels[0].end(), - std::back_inserter(out_columns), - [&](auto const& orc_col_meta) { - out_metadata.schema_info.emplace_back(""); - auto col_buffer = assemble_buffer(orc_col_meta.id, - 0, - *_col_meta, - _metadata, - _selected_columns, - buffers, /*_out_buffers*/ - _stream, - _mr); - return make_column( - col_buffer, &out_metadata.schema_info.back(), std::nullopt, _stream); - }); - - // printf("output col0: \n"); - // cudf::test::print(out_columns.front()->view()); - // printf("output col1: \n"); - // cudf::test::print(out_columns.back()->view()); - - auto tbl = std::make_unique
(std::move(out_columns)); - tabs.push_back(std::move(tbl)); - tv.push_back(tabs.back()->view()); + for (auto& table : _file_itm_data.out_tables) { + tv.push_back(table->view()); // printf(" ----- decode one chunk, size = %d\n", tv.back().num_rows()); @@ -1379,7 +1346,7 @@ table_with_metadata reader::impl::make_output_chunk() return tmp; } - return std::move(tabs.front()); + return std::move(_file_itm_data.out_tables.front()); }(); // auto out_table = std::move(tabs.front()); @@ -1400,7 +1367,7 @@ table_with_metadata reader::impl::make_output_chunk() }(); #endif - return {std::move(out_table), std::move(out_metadata)}; + return {std::move(out_table), _out_metadata}; } table_metadata reader::impl::get_meta_with_user_data() diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index ba1de2e7525..d0996bcdde3 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -123,7 +123,7 @@ struct range { */ struct file_intermediate_data { // TODO: remove - std::vector>> out_buffers; + std::vector> out_tables; int64_t rows_to_skip; size_type rows_to_read; From 6cccca3f7e9a876876e53c211688e8741552cdcd Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 28 Feb 2024 19:25:47 -0800 Subject: [PATCH 113/321] Add test file Signed-off-by: Nghia Truong --- cpp/tests/CMakeLists.txt | 2 +- cpp/tests/io/orc_chunked_reader_test.cpp | 1013 ++++++++++++++++++++++ 2 files changed, 1014 insertions(+), 1 deletion(-) create mode 100644 cpp/tests/io/orc_chunked_reader_test.cpp diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 3e377b07eee..1bf11603bc0 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -286,7 +286,7 @@ ConfigureTest( PERCENT 30 ) ConfigureTest( - ORC_TEST io/orc_test.cpp + ORC_TEST io/orc_chunked_reader_test.cpp io/orc_test.cpp GPUS 1 PERCENT 30 ) diff --git a/cpp/tests/io/orc_chunked_reader_test.cpp b/cpp/tests/io/orc_chunked_reader_test.cpp new file mode 100644 index 00000000000..eecadcc1e05 --- /dev/null +++ b/cpp/tests/io/orc_chunked_reader_test.cpp @@ -0,0 +1,1013 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +namespace { +// Global environment for temporary files +auto const temp_env = reinterpret_cast( + ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment)); + +using int32s_col = cudf::test::fixed_width_column_wrapper; +using int64s_col = cudf::test::fixed_width_column_wrapper; +using strings_col = cudf::test::strings_column_wrapper; +using structs_col = cudf::test::structs_column_wrapper; +using int32s_lists_col = cudf::test::lists_column_wrapper; + +auto write_file(std::vector>& input_columns, + std::string const& filename, + bool nullable, + std::size_t stripe_size_bytes = cudf::io::default_stripe_size_bytes, + cudf::size_type stripe_size_rows = cudf::io::default_stripe_size_rows) +{ + // Just shift nulls of the next column by one position to avoid having all nulls in the same + // table rows. + if (nullable) { + // Generate deterministic bitmask instead of random bitmask for easy computation of data size. + auto const valid_iter = cudf::detail::make_counting_transform_iterator( + 0, [](cudf::size_type i) { return i % 4 != 3; }); + + cudf::size_type offset{0}; + for (auto& col : input_columns) { + auto const [null_mask, null_count] = + cudf::test::detail::make_null_mask(valid_iter + offset, valid_iter + col->size() + offset); + col = cudf::structs::detail::superimpose_nulls( + static_cast(null_mask.data()), + null_count, + std::move(col), + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); + } + } + + auto input_table = std::make_unique(std::move(input_columns)); + auto filepath = + temp_env->get_temp_filepath(nullable ? filename + "_nullable.orc" : filename + ".orc"); + + auto const write_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, *input_table) + .stripe_size_bytes(stripe_size_bytes) + .stripe_size_rows(stripe_size_rows) + .build(); + cudf::io::write_orc(write_opts); + + return std::pair{std::move(input_table), std::move(filepath)}; +} + +auto chunked_read(std::string const& filepath, std::size_t output_limit) +{ + auto const read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).build(); + auto reader = cudf::io::chunked_orc_reader(output_limit, read_opts); + + auto num_chunks = 0; + auto out_tables = std::vector>{}; + + do { + auto chunk = reader.read_chunk(); + // If the input file is empty, the first call to `read_chunk` will return an empty table. + // Thus, we only check for non-empty output table from the second call. + if (num_chunks > 0) { + CUDF_EXPECTS(chunk.tbl->num_rows() != 0, "Number of rows in the new chunk is zero."); + } + ++num_chunks; + out_tables.emplace_back(std::move(chunk.tbl)); + } while (reader.has_next()); + + auto out_tviews = std::vector{}; + for (auto const& tbl : out_tables) { + out_tviews.emplace_back(tbl->view()); + } + + return std::pair(cudf::concatenate(out_tviews), num_chunks); +} + +} // namespace + +struct OrcChunkedReaderTest : public cudf::test::BaseFixture {}; + +TEST_F(OrcChunkedReaderTest, TestChunkedReadNoData) +{ + std::vector> input_columns; + input_columns.emplace_back(int32s_col{}.release()); + input_columns.emplace_back(int64s_col{}.release()); + + auto const [expected, filepath] = write_file(input_columns, "chunked_read_empty", false); + auto const [result, num_chunks] = chunked_read(filepath, 1'000); + EXPECT_EQ(num_chunks, 1); + EXPECT_EQ(result->num_rows(), 0); + EXPECT_EQ(result->num_columns(), 2); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); +} + +#if 0 +TEST_F(OrcChunkedReaderTest, TestChunkedReadSimpleData) +{ + auto constexpr num_rows = 40'000; + + auto const generate_input = [num_rows](bool nullable) { + std::vector> input_columns; + auto const value_iter = thrust::make_counting_iterator(0); + input_columns.emplace_back(int32s_col(value_iter, value_iter + num_rows).release()); + input_columns.emplace_back(int64s_col(value_iter, value_iter + num_rows).release()); + + return write_file(input_columns, "chunked_read_simple", nullable); + }; + + { + auto const [expected, filepath] = generate_input(false); + auto const [result, num_chunks] = chunked_read(filepath, 240'000); + EXPECT_EQ(num_chunks, 2); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + { + auto const [expected, filepath] = generate_input(true); + auto const [result, num_chunks] = chunked_read(filepath, 240'000); + EXPECT_EQ(num_chunks, 2); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } +} + + +TEST_F(OrcChunkedReaderTest, TestChunkedReadBoundaryCases) +{ + // Tests some specific boundary conditions in the split calculations. + + auto constexpr num_rows = 40'000; + + auto const [expected, filepath] = [num_rows]() { + std::vector> input_columns; + auto const value_iter = thrust::make_counting_iterator(0); + input_columns.emplace_back(int32s_col(value_iter, value_iter + num_rows).release()); + return write_file(input_columns, "chunked_read_simple_boundary", false /*nullable*/); + }(); + + // Test with zero limit: everything will be read in one chunk + { + auto const [result, num_chunks] = chunked_read(filepath, 0); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // Test with a very small limit: 1 byte + { + auto const [result, num_chunks] = chunked_read(filepath, 1); + EXPECT_EQ(num_chunks, 2); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // Test with a very large limit + { + auto const [result, num_chunks] = chunked_read(filepath, 2L << 40); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // Test with a limit slightly less than one page of data + { + auto const [result, num_chunks] = chunked_read(filepath, 79'000); + EXPECT_EQ(num_chunks, 2); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // Test with a limit exactly the size one page of data + { + auto const [result, num_chunks] = chunked_read(filepath, 80'000); + EXPECT_EQ(num_chunks, 2); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // Test with a limit slightly more the size one page of data + { + auto const [result, num_chunks] = chunked_read(filepath, 81'000); + EXPECT_EQ(num_chunks, 2); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // Test with a limit slightly less than two pages of data + { + auto const [result, num_chunks] = chunked_read(filepath, 159'000); + EXPECT_EQ(num_chunks, 2); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // Test with a limit exactly the size of two pages of data minus one byte + { + auto const [result, num_chunks] = chunked_read(filepath, 159'999); + EXPECT_EQ(num_chunks, 2); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // Test with a limit exactly the size of two pages of data + { + auto const [result, num_chunks] = chunked_read(filepath, 160'000); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // Test with a limit slightly more the size two pages of data + { + auto const [result, num_chunks] = chunked_read(filepath, 161'000); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } +} + +TEST_F(OrcChunkedReaderTest, TestChunkedReadWithString) +{ + auto constexpr num_rows = 60'000; + + auto const generate_input = [num_rows](bool nullable) { + std::vector> input_columns; + auto const value_iter = thrust::make_counting_iterator(0); + + // ints Page total bytes cumulative bytes + // 20000 rows of 4 bytes each = A0 80000 80000 + // 20000 rows of 4 bytes each = A1 80000 160000 + // 20000 rows of 4 bytes each = A2 80000 240000 + input_columns.emplace_back(int32s_col(value_iter, value_iter + num_rows).release()); + + // strings Page total bytes cumulative bytes + // 20000 rows of 1 char each (20000 + 80004) = B0 100004 100004 + // 20000 rows of 4 chars each (80000 + 80004) = B1 160004 260008 + // 20000 rows of 16 chars each (320000 + 80004) = B2 400004 660012 + auto const strings = std::vector{"a", "bbbb", "cccccccccccccccc"}; + auto const str_iter = cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { + if (i < 20000) { return strings[0]; } + if (i < 40000) { return strings[1]; } + return strings[2]; + }); + input_columns.emplace_back(strings_col(str_iter, str_iter + num_rows).release()); + + // Cumulative sizes: + // A0 + B0 : 180004 + // A1 + B1 : 420008 + // A2 + B2 : 900012 + // skip_rows / num_rows + // byte_limit==500000 should give 2 chunks: {0, 40000}, {40000, 20000} + // byte_limit==1000000 should give 1 chunks: {0, 60000}, + return write_file(input_columns, + "chunked_read_with_strings", + nullable, + 512 * 1024, // 512KB per page + 20000 // 20k rows per page + ); + }; + + auto const [expected_no_null, filepath_no_null] = generate_input(false); + auto const [expected_with_nulls, filepath_with_nulls] = generate_input(true); + + // Test with zero limit: everything will be read in one chunk + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 0); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } + + // Test with a very small limit: 1 byte + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 1); + EXPECT_EQ(num_chunks, 3); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1); + EXPECT_EQ(num_chunks, 3); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } + + // Test with a very large limit + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 2L << 40); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2L << 40); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } + + // Other tests: + + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 500'000); + EXPECT_EQ(num_chunks, 2); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 500'000); + EXPECT_EQ(num_chunks, 2); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } + + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'000'000); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } +} + +TEST_F(OrcChunkedReaderTest, TestChunkedReadWithStringPrecise) +{ + auto constexpr num_rows = 60'000; + + auto const generate_input = [num_rows](bool nullable) { + std::vector> input_columns; + + // strings Page total bytes cumulative + // 20000 rows alternating 1-4 chars each (50000 + 80004) A0 130004 130004 + // 20000 rows alternating 1-4 chars each (50000 + 80004) A1 130004 260008 + // ... + auto const strings = std::vector{"a", "bbbb"}; + auto const str_iter = + cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { return strings[i % 2]; }); + input_columns.emplace_back(strings_col(str_iter, str_iter + num_rows).release()); + + // Cumulative sizes: + // A0 : 130004 + // A1 : 260008 + // A2 : 390012 + return write_file(input_columns, + "chunked_read_with_strings_precise", + nullable, + 512 * 1024, // 512KB per page + 20000 // 20k rows per page + ); + }; + + auto const [expected_no_null, filepath_no_null] = generate_input(false); + + // a chunk limit of 1 byte less than 2 pages should force it to produce 3 chunks: + // each 1 page in size + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 260'007); + EXPECT_EQ(num_chunks, 3); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + + // a chunk limit of exactly equal to 2 pages should force it to produce 2 chunks + // pages 0-1 and page 2 + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 260'008); + EXPECT_EQ(num_chunks, 2); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } +} + +TEST_F(OrcChunkedReaderTest, TestChunkedReadWithStructs) +{ + auto constexpr num_rows = 100'000; + + auto const generate_input = [num_rows](bool nullable) { + std::vector> input_columns; + auto const int_iter = thrust::make_counting_iterator(0); + input_columns.emplace_back(int32s_col(int_iter, int_iter + num_rows).release()); + input_columns.emplace_back([=] { + auto child1 = int32s_col(int_iter, int_iter + num_rows); + auto child2 = int32s_col(int_iter + num_rows, int_iter + num_rows * 2); + + auto const str_iter = cudf::detail::make_counting_transform_iterator( + 0, [&](int32_t i) { return std::to_string(i); }); + auto child3 = strings_col{str_iter, str_iter + num_rows}; + + return structs_col{{child1, child2, child3}}.release(); + }()); + + return write_file(input_columns, + "chunked_read_with_structs", + nullable, + 512 * 1024, // 512KB per page + 20000 // 20k rows per page + ); + }; + + auto const [expected_no_null, filepath_no_null] = generate_input(false); + auto const [expected_with_nulls, filepath_with_nulls] = generate_input(true); + + // Test with zero limit: everything will be read in one chunk + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 0); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } + + // Test with a very small limit: 1 byte + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 1); + EXPECT_EQ(num_chunks, 5); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1); + EXPECT_EQ(num_chunks, 5); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } + + // Test with a very large limit + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 2L << 40); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2L << 40); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } + + // Other tests: + + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 500'000); + EXPECT_EQ(num_chunks, 5); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 500'000); + EXPECT_EQ(num_chunks, 5); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } +} + +TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsNoNulls) +{ + auto constexpr num_rows = 100'000; + + auto const [expected, filepath] = [num_rows]() { + std::vector> input_columns; + // 20000 rows in 1 page consist of: + // + // 20001 offsets : 80004 bytes + // 30000 ints : 120000 bytes + // total : 200004 bytes + auto const template_lists = int32s_lists_col{ + int32s_lists_col{}, int32s_lists_col{0}, int32s_lists_col{1, 2}, int32s_lists_col{3, 4, 5}}; + + auto const gather_iter = + cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { return i % 4; }); + auto const gather_map = int32s_col(gather_iter, gather_iter + num_rows); + input_columns.emplace_back( + std::move(cudf::gather(cudf::table_view{{template_lists}}, gather_map)->release().front())); + + return write_file(input_columns, + "chunked_read_with_lists_no_null", + false /*nullable*/, + 512 * 1024, // 512KB per page + 20000 // 20k rows per page + ); + }(); + + // Test with zero limit: everything will be read in one chunk + { + auto const [result, num_chunks] = chunked_read(filepath, 0); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // Test with a very small limit: 1 byte + { + auto const [result, num_chunks] = chunked_read(filepath, 1); + EXPECT_EQ(num_chunks, 5); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // Test with a very large limit + { + auto const [result, num_chunks] = chunked_read(filepath, 2L << 40); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // chunk size slightly less than 1 page (forcing it to be at least 1 page per read) + { + auto const [result, num_chunks] = chunked_read(filepath, 200'000); + EXPECT_EQ(num_chunks, 5); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // chunk size exactly 1 page + { + auto const [result, num_chunks] = chunked_read(filepath, 200'004); + EXPECT_EQ(num_chunks, 5); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // chunk size 2 pages. 3 chunks (2 pages + 2 pages + 1 page) + { + auto const [result, num_chunks] = chunked_read(filepath, 400'008); + EXPECT_EQ(num_chunks, 3); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // chunk size 2 pages minus one byte: each chunk will be just one page + { + auto const [result, num_chunks] = chunked_read(filepath, 400'007); + EXPECT_EQ(num_chunks, 5); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } +} + +TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsHavingNulls) +{ + auto constexpr num_rows = 100'000; + + auto const [expected, filepath] = [num_rows]() { + std::vector> input_columns; + // 20000 rows in 1 page consist of: + // + // 625 validity words : 2500 bytes (a null every 4 rows: null at indices [3, 7, 11, ...]) + // 20001 offsets : 80004 bytes + // 15000 ints : 60000 bytes + // total : 142504 bytes + auto const template_lists = + int32s_lists_col{// these will all be null + int32s_lists_col{}, + int32s_lists_col{0}, + int32s_lists_col{1, 2}, + int32s_lists_col{3, 4, 5, 6, 7, 8, 9} /* this list will be nullified out */}; + auto const gather_iter = + cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { return i % 4; }); + auto const gather_map = int32s_col(gather_iter, gather_iter + num_rows); + input_columns.emplace_back( + std::move(cudf::gather(cudf::table_view{{template_lists}}, gather_map)->release().front())); + + return write_file(input_columns, + "chunked_read_with_lists_nulls", + true /*nullable*/, + 512 * 1024, // 512KB per page + 20000 // 20k rows per page + ); + }(); + + // Test with zero limit: everything will be read in one chunk + { + auto const [result, num_chunks] = chunked_read(filepath, 0); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // Test with a very small limit: 1 byte + { + auto const [result, num_chunks] = chunked_read(filepath, 1); + EXPECT_EQ(num_chunks, 5); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // Test with a very large limit + { + auto const [result, num_chunks] = chunked_read(filepath, 2L << 40); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // chunk size slightly less than 1 page (forcing it to be at least 1 page per read) + { + auto const [result, num_chunks] = chunked_read(filepath, 142'500); + EXPECT_EQ(num_chunks, 5); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // chunk size exactly 1 page + { + auto const [result, num_chunks] = chunked_read(filepath, 142'504); + EXPECT_EQ(num_chunks, 5); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // chunk size 2 pages. 3 chunks (2 pages + 2 pages + 1 page) + { + auto const [result, num_chunks] = chunked_read(filepath, 285'008); + EXPECT_EQ(num_chunks, 3); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // chunk size 2 pages minus 1 byte: each chunk will be just one page + { + auto const [result, num_chunks] = chunked_read(filepath, 285'007); + EXPECT_EQ(num_chunks, 5); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } +} + +TEST_F(OrcChunkedReaderTest, TestChunkedReadWithStructsOfLists) +{ + auto constexpr num_rows = 100'000; + + auto const generate_input = [num_rows](bool nullable) { + std::vector> input_columns; + auto const int_iter = thrust::make_counting_iterator(0); + input_columns.emplace_back(int32s_col(int_iter, int_iter + num_rows).release()); + input_columns.emplace_back([=] { + std::vector> child_columns; + child_columns.emplace_back(int32s_col(int_iter, int_iter + num_rows).release()); + child_columns.emplace_back( + int32s_col(int_iter + num_rows, int_iter + num_rows * 2).release()); + + auto const str_iter = cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { + return std::to_string(i) + "++++++++++++++++++++" + std::to_string(i); + }); + child_columns.emplace_back(strings_col{str_iter, str_iter + num_rows}.release()); + + auto const template_lists = int32s_lists_col{ + int32s_lists_col{}, int32s_lists_col{0}, int32s_lists_col{0, 1}, int32s_lists_col{0, 1, 2}}; + auto const gather_iter = + cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { return i % 4; }); + auto const gather_map = int32s_col(gather_iter, gather_iter + num_rows); + child_columns.emplace_back( + std::move(cudf::gather(cudf::table_view{{template_lists}}, gather_map)->release().front())); + + return structs_col(std::move(child_columns)).release(); + }()); + + return write_file(input_columns, + "chunked_read_with_structs_of_lists", + nullable, + 512 * 1024, // 512KB per page + 20000 // 20k rows per page + ); + }; + + auto const [expected_no_null, filepath_no_null] = generate_input(false); + auto const [expected_with_nulls, filepath_with_nulls] = generate_input(true); + + // Test with zero limit: everything will be read in one chunk + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 0); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } + + // Test with a very small limit: 1 byte + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 1); + EXPECT_EQ(num_chunks, 10); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1); + EXPECT_EQ(num_chunks, 5); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } + + // Test with a very large limit + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 2L << 40); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2L << 40); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } + + // Other tests: + + // for these tests, different columns get written to different numbers of pages so it's a + // little tricky to describe the expected results by page counts. To get an idea of how + // these values are chosen, see the debug output from the call to print_cumulative_row_info() in + // reader_impl_preprocess.cu -> find_splits() + + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000); + EXPECT_EQ(num_chunks, 7); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'500'000); + EXPECT_EQ(num_chunks, 4); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 2'000'000); + EXPECT_EQ(num_chunks, 4); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 5'000'000); + EXPECT_EQ(num_chunks, 2); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'000'000); + EXPECT_EQ(num_chunks, 5); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } + + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'500'000); + EXPECT_EQ(num_chunks, 5); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } + + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2'000'000); + EXPECT_EQ(num_chunks, 3); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } + + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 5'000'000); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } +} + +TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsOfStructs) +{ + auto constexpr num_rows = 100'000; + + auto const generate_input = [num_rows](bool nullable) { + std::vector> input_columns; + auto const int_iter = thrust::make_counting_iterator(0); + input_columns.emplace_back(int32s_col(int_iter, int_iter + num_rows).release()); + + auto offsets = std::vector{}; + offsets.reserve(num_rows * 2); + cudf::size_type num_structs = 0; + for (int i = 0; i < num_rows; ++i) { + offsets.push_back(num_structs); + auto const new_list_size = i % 4; + num_structs += new_list_size; + } + offsets.push_back(num_structs); + + auto const make_structs_col = [=] { + auto child1 = int32s_col(int_iter, int_iter + num_structs); + auto child2 = int32s_col(int_iter + num_structs, int_iter + num_structs * 2); + + auto const str_iter = cudf::detail::make_counting_transform_iterator( + 0, [&](int32_t i) { return std::to_string(i) + std::to_string(i) + std::to_string(i); }); + auto child3 = strings_col{str_iter, str_iter + num_structs}; + + return structs_col{{child1, child2, child3}}.release(); + }; + + input_columns.emplace_back( + cudf::make_lists_column(static_cast(offsets.size() - 1), + int32s_col(offsets.begin(), offsets.end()).release(), + make_structs_col(), + 0, + rmm::device_buffer{})); + + return write_file(input_columns, + "chunked_read_with_lists_of_structs", + nullable, + 512 * 1024, // 512KB per page + 20000 // 20k rows per page + ); + }; + + auto const [expected_no_null, filepath_no_null] = generate_input(false); + auto const [expected_with_nulls, filepath_with_nulls] = generate_input(true); + + // Test with zero limit: everything will be read in one chunk + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 0); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } + + // Test with a very small limit: 1 byte + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 1); + EXPECT_EQ(num_chunks, 10); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1); + EXPECT_EQ(num_chunks, 5); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } + + // Test with a very large limit + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 2L << 40); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2L << 40); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } + + // for these tests, different columns get written to different numbers of pages so it's a + // little tricky to describe the expected results by page counts. To get an idea of how + // these values are chosen, see the debug output from the call to print_cumulative_row_info() in + // reader_impl_preprocess.cu -> find_splits() + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000); + EXPECT_EQ(num_chunks, 7); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'500'000); + EXPECT_EQ(num_chunks, 4); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 2'000'000); + EXPECT_EQ(num_chunks, 4); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + + { + auto const [result, num_chunks] = chunked_read(filepath_no_null, 5'000'000); + EXPECT_EQ(num_chunks, 2); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); + } + + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'000'000); + EXPECT_EQ(num_chunks, 5); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } + + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'500'000); + EXPECT_EQ(num_chunks, 4); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } + + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2'000'000); + EXPECT_EQ(num_chunks, 3); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } + + { + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 5'000'000); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); + } +} + +TEST_F(OrcChunkedReaderTest, TestChunkedReadNullCount) +{ + auto constexpr num_rows = 100'000; + + auto const sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return 1; }); + auto const validity = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 4 != 3; }); + cudf::test::fixed_width_column_wrapper col{sequence, sequence + num_rows, validity}; + std::vector> cols; + cols.push_back(col.release()); + auto const expected = std::make_unique(std::move(cols)); + + auto const filepath = temp_env->get_temp_filepath("chunked_reader_null_count.parquet"); + auto const page_limit_rows = num_rows / 5; + auto const write_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected) + .max_page_size_rows(page_limit_rows) // 20k rows per page + .build(); + cudf::io::write_parquet(write_opts); + + auto const byte_limit = page_limit_rows * sizeof(int); + auto const read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).build(); + auto reader = cudf::io::chunked_parquet_reader(byte_limit, read_opts); + + do { + // Every fourth row is null + EXPECT_EQ(reader.read_chunk().tbl->get_column(0).null_count(), page_limit_rows / 4); + } while (reader.has_next()); +} + +TEST_F(OrcChunkedReaderTest, InputLimitSimple) +{ + auto const filepath = temp_env->get_temp_filepath("input_limit_10_rowgroups.parquet"); + + // This results in 10 grow groups, at 4001150 bytes per row group + constexpr int num_rows = 25'000'000; + auto value_iter = cudf::detail::make_counting_transform_iterator(0, [](int i) { return i; }); + cudf::test::fixed_width_column_wrapper expected(value_iter, value_iter + num_rows); + cudf::io::parquet_writer_options opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, + cudf::table_view{{expected}}) + // note: it is unnecessary to force compression to NONE here because the size we are using in + // the row group is the uncompressed data size. But forcing the dictionary policy to + // dictionary_policy::NEVER is necessary to prevent changes in the + // decompressed-but-not-yet-decoded data. + .dictionary_policy(cudf::io::dictionary_policy::NEVER); + + cudf::io::write_parquet(opts); + + { + // no chunking + auto const [result, num_chunks] = chunked_read(filepath, 0, 0); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->get_column(0)); + } + + { + // 25 chunks of 100k rows each + auto const [result, num_chunks] = chunked_read(filepath, 0, 1); + EXPECT_EQ(num_chunks, 25); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->get_column(0)); + } + + { + // 25 chunks of 100k rows each + auto const [result, num_chunks] = chunked_read(filepath, 0, 4000000); + EXPECT_EQ(num_chunks, 25); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->get_column(0)); + } + + { + // 25 chunks of 100k rows each + auto const [result, num_chunks] = chunked_read(filepath, 0, 4100000); + EXPECT_EQ(num_chunks, 25); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->get_column(0)); + } + + { + // 12 chunks of 200k rows each, plus 1 final chunk of 100k rows. + auto const [result, num_chunks] = chunked_read(filepath, 0, 8002301); + EXPECT_EQ(num_chunks, 13); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->get_column(0)); + } + + { + // 1 big chunk + auto const [result, num_chunks] = chunked_read(filepath, 0, size_t{1} * 1024 * 1024 * 1024); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->get_column(0)); + } +} +#endif From 2488cb2479b043f9500f46de607b409f93fa33da Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 28 Feb 2024 19:28:09 -0800 Subject: [PATCH 114/321] Add comment Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index af959b78af8..8db36998311 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -831,6 +831,7 @@ void reader::impl::load_data() // DEBUG only _chunk_read_data.data_read_limit = stripe_decomp_sizes.back().size_bytes / 3; + // TODO: only decode stripes enough for output. _chunk_read_data.decode_stripe_chunks = find_splits(stripe_decomp_sizes, stripe_chunk.count, _chunk_read_data.data_read_limit); for (auto& chunk : _chunk_read_data.decode_stripe_chunks) { From e3db4dcea9093151c9865512f3d090ff038d3df9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 28 Feb 2024 20:38:32 -0800 Subject: [PATCH 115/321] Add `output_row_granularity` parameter Signed-off-by: Nghia Truong --- cpp/include/cudf/io/detail/orc.hpp | 9 +++++++ cpp/include/cudf/io/orc.hpp | 14 ++++++++++ cpp/src/io/functions.cpp | 24 +++++++++++++++++ cpp/src/io/orc/reader.cu | 19 ++++++++++++++ cpp/src/io/orc/reader_impl.cu | 35 +++++++++++++++++++------ cpp/src/io/orc/reader_impl.hpp | 10 +++++++ cpp/src/io/orc/reader_impl_chunking.hpp | 10 +++++-- 7 files changed, 111 insertions(+), 10 deletions(-) diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index ff748e63506..ac024caf1f3 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -102,6 +102,7 @@ class chunked_reader : private reader { * whole file and return a table containing all rows. * * TODO: data read limit + * TODO: granularity * * @param output_size_limit Limit on total number of bytes to be returned per read, * or `0` if there is no limit @@ -119,6 +120,14 @@ class chunked_reader : private reader { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); + explicit chunked_reader(std::size_t output_size_limit, + std::size_t data_read_limit, + size_type output_row_granularity, + std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + /** * @brief Destructor explicitly-declared to avoid inlined in header. * diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 9af86cee6d7..cfab642f25d 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -423,6 +423,12 @@ class chunked_orc_reader { */ chunked_orc_reader() = default; + // TODO + chunked_orc_reader(std::size_t output_size_limit, + orc_reader_options const& options, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Constructor for chunked reader. * @@ -446,6 +452,14 @@ class chunked_orc_reader { rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + // TODO + chunked_orc_reader(std::size_t output_size_limit, + std::size_t data_read_limit, + size_type output_row_granularity, + orc_reader_options const& options, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Destructor, destroying the internal reader instance. * diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index 2f3f57cc2d1..04799fabeef 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -451,13 +451,37 @@ void write_orc(orc_writer_options const& options, rmm::cuda_stream_view stream) /** * @copydoc cudf::io::chunked_orc_reader::chunked_orc_reader */ +chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + : chunked_orc_reader(output_size_limit, 0UL, options, stream, mr) +{ +} + +chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, + std::size_t data_read_limit, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + : reader{std::make_unique(output_size_limit, + data_read_limit, + make_datasources(options.get_source()), + options, + stream, + mr)} +{ +} + chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, std::size_t data_read_limit, + size_type output_row_granularity, orc_reader_options const& options, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) : reader{std::make_unique(output_size_limit, data_read_limit, + output_row_granularity, make_datasources(options.get_source()), options, stream, diff --git a/cpp/src/io/orc/reader.cu b/cpp/src/io/orc/reader.cu index 855a96c9ae3..4d285e6788d 100644 --- a/cpp/src/io/orc/reader.cu +++ b/cpp/src/io/orc/reader.cu @@ -42,11 +42,30 @@ chunked_reader::chunked_reader(std::size_t output_size_limit, orc_reader_options const& options, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) + : reader() // TODO { _impl = std::make_unique( output_size_limit, data_read_limit, std::move(sources), options, stream, mr); } +chunked_reader::chunked_reader(std::size_t output_size_limit, + std::size_t data_read_limit, + size_type output_row_granularity, + std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + : reader() // TODO +{ + _impl = std::make_unique(output_size_limit, + data_read_limit, + output_row_granularity, + std::move(sources), + options, + stream, + mr); +} + chunked_reader::~chunked_reader() = default; bool chunked_reader::has_next() const { return _impl->has_next(); } diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index ae46b3c6d48..75e743df9a4 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -729,13 +729,13 @@ void generate_offsets_for_list(host_span buff_data, rmm::cuda_ * @return */ std::vector find_table_splits(table_view const& input, + size_type segment_length, std::size_t size_limit, rmm::cuda_stream_view stream) { // Default 10k rows. - size_type constexpr SEGMENT_SIZE = 10'000; - auto const d_segmented_sizes = cudf::detail::segmented_bit_count( - input, SEGMENT_SIZE, stream, rmm::mr::get_current_device_resource()); + auto const d_segmented_sizes = cudf::detail::segmented_bit_count( + input, segment_length, stream, rmm::mr::get_current_device_resource()); auto const d_size_begin = d_segmented_sizes->view().begin(); auto segmented_sizes = @@ -746,8 +746,8 @@ std::vector find_table_splits(table_view const& input, d_size_begin, d_size_begin + d_segmented_sizes->size(), segmented_sizes.d_begin(), - [SEGMENT_SIZE] __device__(auto const size) { - return cumulative_size{SEGMENT_SIZE, static_cast(size)}; + [segment_length] __device__(auto const size) { + return cumulative_size{segment_length, static_cast(size)}; }); // TODO: exec_policy_nosync thrust::inclusive_scan(rmm::exec_policy(stream), @@ -1236,8 +1236,10 @@ void reader::impl::decompress_and_decode() // DEBUG only _chunk_read_data.output_size_limit = _chunk_read_data.data_read_limit / 3; - _chunk_read_data.output_table_chunks = - find_table_splits(_decoded_table->view(), _chunk_read_data.output_size_limit, _stream); + _chunk_read_data.output_table_chunks = find_table_splits(_decoded_table->view(), + _chunk_read_data.output_row_granularity, + _chunk_read_data.output_size_limit, + _stream); _chunk_read_data.curr_output_table_chunk = 0; auto& splits = _chunk_read_data.output_table_chunks; @@ -1413,6 +1415,23 @@ reader::impl::impl(std::size_t output_size_limit, orc_reader_options const& options, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) + : reader::impl::impl(output_size_limit, + data_read_limit, + DEFAULT_OUTPUT_ROW_GRANULARITY, + std::move(sources), + options, + stream, + mr) +{ +} + +reader::impl::impl(std::size_t output_size_limit, + std::size_t data_read_limit, + size_type output_row_granularity, + std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) : _stream(stream), _mr(mr), _config{options.get_timestamp_type(), @@ -1423,7 +1442,7 @@ reader::impl::impl(std::size_t output_size_limit, _sources(std::move(sources)), _metadata{_sources, stream}, _selected_columns{_metadata.select_columns(options.get_columns())}, - _chunk_read_data{output_size_limit, data_read_limit} + _chunk_read_data{output_size_limit, data_read_limit, output_row_granularity} { } diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index b94f639c05d..e6764d6d688 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -63,6 +63,14 @@ class reader::impl { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); + explicit impl(std::size_t output_size_limit, + std::size_t data_read_limit, + size_type output_row_granularity, + std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + /** * @brief Read an entire set or a subset of data and returns a set of columns * @@ -178,6 +186,8 @@ class reader::impl { table_metadata _out_metadata; std::vector> _out_buffers; std::unique_ptr _decoded_table; + + static constexpr size_type DEFAULT_OUTPUT_ROW_GRANULARITY = 10'000; }; } // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index d0996bcdde3..47b6ae7a02e 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -201,14 +201,20 @@ struct file_intermediate_data { * @brief Struct to store all data necessary for chunked reading. */ struct chunk_read_data { - explicit chunk_read_data(std::size_t output_size_limit_ = 0, std::size_t data_read_limit_ = 0) - : output_size_limit{output_size_limit_}, data_read_limit(data_read_limit_) + explicit chunk_read_data(std::size_t output_size_limit_, + std::size_t data_read_limit_, + size_type output_row_granularity_) + : output_size_limit{output_size_limit_}, + data_read_limit{data_read_limit_}, + output_row_granularity{output_row_granularity_} { } + // TODO: const for 3 below? std::size_t output_size_limit; // maximum size (in bytes) of an output chunk, or 0 for no limit std::size_t data_read_limit; // approximate maximum size (in bytes) used for store // intermediate data, or 0 for no limit + size_type output_row_granularity; // TODO // Chunks of stripes that can be load into memory such that their data size is within a size // limit. From 94d66ad8129d1ab14e75a44d8696668ab9f3443c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 28 Feb 2024 22:04:19 -0800 Subject: [PATCH 116/321] Fix segment length Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 4 ++++ cpp/tests/io/orc_chunked_reader_test.cpp | 17 +++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 75e743df9a4..19d433c04f6 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -733,6 +733,10 @@ std::vector find_table_splits(table_view const& input, std::size_t size_limit, rmm::cuda_stream_view stream) { + // If segment_length is zero: we don't have any limit on granularity. + // As such, set segment length to the number of rows. + if (segment_length == 0) { segment_length = input.num_rows(); } + // Default 10k rows. auto const d_segmented_sizes = cudf::detail::segmented_bit_count( input, segment_length, stream, rmm::mr::get_current_device_resource()); diff --git a/cpp/tests/io/orc_chunked_reader_test.cpp b/cpp/tests/io/orc_chunked_reader_test.cpp index eecadcc1e05..e6a7d3fcb36 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cpp +++ b/cpp/tests/io/orc_chunked_reader_test.cpp @@ -38,10 +38,10 @@ #include #include -#include - #include +#include + #include #include @@ -96,11 +96,15 @@ auto write_file(std::vector>& input_columns, return std::pair{std::move(input_table), std::move(filepath)}; } -auto chunked_read(std::string const& filepath, std::size_t output_limit) +auto chunked_read(std::string const& filepath, + std::size_t output_limit, + std::size_t input_limit = 0, + cudf::size_type output_row_granularity = 0) { auto const read_opts = cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).build(); - auto reader = cudf::io::chunked_orc_reader(output_limit, read_opts); + auto reader = + cudf::io::chunked_orc_reader(output_limit, input_limit, output_row_granularity, read_opts); auto num_chunks = 0; auto out_tables = std::vector>{}; @@ -142,7 +146,6 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadNoData) CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } -#if 0 TEST_F(OrcChunkedReaderTest, TestChunkedReadSimpleData) { auto constexpr num_rows = 40'000; @@ -169,9 +172,11 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadSimpleData) EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } -} + exit(0); +} +#if 0 TEST_F(OrcChunkedReaderTest, TestChunkedReadBoundaryCases) { // Tests some specific boundary conditions in the split calculations. From e270aa38c1b7e1602e5dad6272bde75901d00219 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 29 Feb 2024 13:13:32 -0800 Subject: [PATCH 117/321] Use chunking for chunked reader Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 89 ++++++++----------------- cpp/src/io/orc/reader_impl.hpp | 1 - cpp/src/io/orc/reader_impl_chunking.cu | 22 +++--- cpp/src/io/orc/reader_impl_chunking.hpp | 3 - 4 files changed, 40 insertions(+), 75 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 19d433c04f6..4e0935d908d 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -1235,19 +1235,23 @@ void reader::impl::decompress_and_decode() orc_col_meta.id, 0, *_col_meta, _metadata, _selected_columns, _out_buffers, _stream, _mr); return make_column(col_buffer, &_out_metadata.schema_info.back(), std::nullopt, _stream); }); - _decoded_table = std::make_unique
(std::move(out_columns)); + _chunk_read_data.decoded_table = std::make_unique
(std::move(out_columns)); // DEBUG only - _chunk_read_data.output_size_limit = _chunk_read_data.data_read_limit / 3; + // _chunk_read_data.output_size_limit = _chunk_read_data.data_read_limit / 3; - _chunk_read_data.output_table_chunks = find_table_splits(_decoded_table->view(), - _chunk_read_data.output_row_granularity, - _chunk_read_data.output_size_limit, - _stream); _chunk_read_data.curr_output_table_chunk = 0; + _chunk_read_data.output_table_chunks = + _chunk_read_data.output_size_limit == 0 + ? std::vector{chunk{0, _chunk_read_data.decoded_table->num_rows()}} + : find_table_splits(_chunk_read_data.decoded_table->view(), + _chunk_read_data.output_row_granularity, + _chunk_read_data.output_size_limit, + _stream); auto& splits = _chunk_read_data.output_table_chunks; - printf("------------\nSplits (/total num rows = %d): \n", (int)_decoded_table->num_rows()); + printf("------------\nSplits decoded table (/total num rows = %d): \n", + (int)_chunk_read_data.decoded_table->num_rows()); for (size_t idx = 0; idx < splits.size(); idx++) { printf("{%ld, %ld}\n", splits[idx].start_idx, splits[idx].count); } @@ -1268,16 +1272,18 @@ void reader::impl::prepare_data(int64_t skip_rows, global_preprocess(skip_rows, num_rows_opt, stripes); - // TODO: only load data if there is no loaded stripe ready to decode. - // load_data(); - while (_chunk_read_data.more_stripe_to_load()) { - load_data(); + if (!_chunk_read_data.more_table_chunk_to_output()) { + if (!_chunk_read_data.more_stripe_to_decode() && _chunk_read_data.more_stripe_to_load()) { + printf("load more data\n\n"); + load_data(); + } - while (_chunk_read_data.more_stripe_to_decode()) { + if (_chunk_read_data.more_stripe_to_decode()) { + printf("decode more data\n\n"); decompress_and_decode(); - _file_itm_data.out_tables.push_back(std::move(_decoded_table)); } } + printf("done load and decode data\n\n"); // decompress_and_decode(); @@ -1293,7 +1299,7 @@ table_with_metadata reader::impl::make_output_chunk() if (_selected_columns.num_levels() == 0) { return {std::make_unique
(), table_metadata{}}; } // If no rows or stripes to read, return empty columns - if (_file_itm_data.has_no_data() /*|| !_chunk_read_data.has_next()*/) { + if (_file_itm_data.has_no_data() || !_chunk_read_data.more_table_chunk_to_output()) { printf("has no next\n"); std::vector> out_columns; auto out_metadata = get_meta_with_user_data(); @@ -1313,50 +1319,7 @@ table_with_metadata reader::impl::make_output_chunk() return {std::make_unique
(std::move(out_columns)), std::move(out_metadata)}; } - std::vector tv; - - for (auto& table : _file_itm_data.out_tables) { - tv.push_back(table->view()); - - // - printf(" ----- decode one chunk, size = %d\n", tv.back().num_rows()); - fflush(stdout); - // - // - // - // - } - printf(" ----- decode total %d chunks\n", (int)tv.size()); - fflush(stdout); - - // todo: remove this - // auto out_table = std::make_unique
(std::move(out_columns)); - auto out_table = [&] { - if (tv.size() > 1) { - auto tmp = cudf::concatenate(tv); - std::vector has_mask(tmp->num_columns(), false); - std::vector has_nulls(tmp->num_columns(), false); - - for (int i = 0; i < tmp->num_columns(); ++i) { - for (int j = 0; j < (int)tv.size(); ++j) { - if (tv[j].column(i).nullable()) { has_mask[i] = true; } - if (tv[j].column(i).null_count()) { has_nulls[i] = true; } - } - } - for (int i = 0; i < tmp->num_columns(); ++i) { - if (has_mask[i] && !has_nulls[i]) { - tmp->get_column(i).set_null_mask( - cudf::create_null_mask(tmp->get_column(i).size(), cudf::mask_state::ALL_VALID), 0); - } - } - - return tmp; - } - return std::move(_file_itm_data.out_tables.front()); - }(); - // auto out_table = std::move(tabs.front()); - -#if 0 +#if 1 auto out_table = [&] { if (_chunk_read_data.output_table_chunks.size() == 1) { return std::move(_chunk_read_data.decoded_table); @@ -1365,11 +1328,11 @@ table_with_metadata reader::impl::make_output_chunk() auto const out_chunk = _chunk_read_data.output_table_chunks[_chunk_read_data.curr_output_table_chunk++]; auto const out_tview = - cudf::slice(_chunk_read_data.decoded_table->view(), - {static_cast(out_chunk.start_idx), - static_cast(out_chunk.start_idx + out_chunk.count)}, - _stream)[0]; - return std::make_unique
(out_tview); + cudf::detail::slice(_chunk_read_data.decoded_table->view(), + {static_cast(out_chunk.start_idx), + static_cast(out_chunk.start_idx + out_chunk.count)}, + _stream)[0]; + return std::make_unique
(out_tview, _stream, _mr); }(); #endif diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index e6764d6d688..9ca003672a4 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -185,7 +185,6 @@ class reader::impl { std::unique_ptr _meta_with_user_data; table_metadata _out_metadata; std::vector> _out_buffers; - std::unique_ptr _decoded_table; static constexpr size_type DEFAULT_OUTPUT_ROW_GRANULARITY = 10'000; }; diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 8db36998311..2fe8f6753f1 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -311,7 +311,10 @@ std::vector find_splits(host_span sizes, int64_t total_count, size_t size_limit) { - // if (size_limit == 0) { return {chunk{0, total_count}}; } + // if (size_limit == 0) { + // printf("0 limit: output chunk = 0, %d\n", (int)total_count); + // return {chunk{0, total_count}}; + // } CUDF_EXPECTS(size_limit > 0, "Invalid size limit"); std::vector splits; @@ -592,10 +595,13 @@ void reader::impl::global_preprocess(uint64_t skip_rows, chunk{last_read_size, static_cast(read_info.size() - last_read_size)}; } + _chunk_read_data.curr_load_stripe_chunk = 0; + // Load all chunks if there is no read limit. if (_chunk_read_data.data_read_limit == 0) { + printf("0 limit: output load stripe chunk = 0, %d\n", (int)num_stripes); _chunk_read_data.load_stripe_chunks = {chunk{0, static_cast(num_stripes)}}; - // return; + return; } printf("total stripe sizes:\n"); @@ -620,7 +626,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // DEBUG only // TODO: use 0.3 constant - _chunk_read_data.data_read_limit = total_stripe_sizes.back().size_bytes / 3; + // _chunk_read_data.data_read_limit = total_stripe_sizes.back().size_bytes / 3; _chunk_read_data.load_stripe_chunks = find_splits(total_stripe_sizes, num_stripes, _chunk_read_data.data_read_limit); @@ -811,11 +817,14 @@ void reader::impl::load_data() } // end loop level + // Decoding is reset to start from the first chunk in `decode_stripe_chunks`. + _chunk_read_data.curr_decode_stripe_chunk = 0; + // Decode all chunks if there is no read limit. if (_chunk_read_data.data_read_limit == 0) { _chunk_read_data.decode_stripe_chunks = {stripe_chunk}; // TODO: DEBUG only - // return; + return; } // Compute the prefix sum of stripe data sizes. @@ -829,7 +838,7 @@ void reader::impl::load_data() stripe_decomp_sizes.device_to_host_sync(_stream); // DEBUG only - _chunk_read_data.data_read_limit = stripe_decomp_sizes.back().size_bytes / 3; + // _chunk_read_data.data_read_limit = stripe_decomp_sizes.back().size_bytes / 3; // TODO: only decode stripes enough for output. _chunk_read_data.decode_stripe_chunks = @@ -865,9 +874,6 @@ void reader::impl::load_data() // lvl_stripe_data.clear(); // _file_itm_data.compinfo_ready = true; - - // Decoding is reset to start from the first chunk in `decode_stripe_chunks`. - _chunk_read_data.curr_decode_stripe_chunk = 0; } } // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 47b6ae7a02e..cc37ac585a3 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -122,9 +122,6 @@ struct range { * @brief Struct to store file-level data that remains constant for all chunks being output. */ struct file_intermediate_data { - // TODO: remove - std::vector> out_tables; - int64_t rows_to_skip; size_type rows_to_read; std::vector selected_stripes; From b307b802d0535c84f061afa66629eba67bb21f0a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 29 Feb 2024 13:27:28 -0800 Subject: [PATCH 118/321] Fix bug in chunking Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 4e0935d908d..d7b7bc47e13 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -1322,6 +1322,7 @@ table_with_metadata reader::impl::make_output_chunk() #if 1 auto out_table = [&] { if (_chunk_read_data.output_table_chunks.size() == 1) { + _chunk_read_data.curr_output_table_chunk++; return std::move(_chunk_read_data.decoded_table); } From fcdc9c1d89e1cacb67a165f611b072a8ecb9b599 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 29 Feb 2024 13:30:48 -0800 Subject: [PATCH 119/321] Add debug info Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index d7b7bc47e13..68b85941709 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -733,6 +733,8 @@ std::vector find_table_splits(table_view const& input, std::size_t size_limit, rmm::cuda_stream_view stream) { + printf("find table split, seg length = %d, limit = %d \n", segment_length, (int)size_limit); + // If segment_length is zero: we don't have any limit on granularity. // As such, set segment length to the number of rows. if (segment_length == 0) { segment_length = input.num_rows(); } @@ -753,6 +755,14 @@ std::vector find_table_splits(table_view const& input, [segment_length] __device__(auto const size) { return cumulative_size{segment_length, static_cast(size)}; }); + + // TODO: remove: + segmented_sizes.device_to_host_sync(stream); + printf("total row sizes by segment = %d:\n", (int)segment_length); + for (auto& size : segmented_sizes) { + printf("size: %ld, %zu\n", size.count, size.size_bytes); + } + // TODO: exec_policy_nosync thrust::inclusive_scan(rmm::exec_policy(stream), segmented_sizes.d_begin(), @@ -1412,6 +1422,13 @@ reader::impl::impl(std::size_t output_size_limit, _selected_columns{_metadata.select_columns(options.get_columns())}, _chunk_read_data{output_size_limit, data_read_limit, output_row_granularity} { + printf("construct reader , limit = %d, %d, gradunarity %d \n", + + (int)output_size_limit, + (int)data_read_limit, + (int)output_row_granularity + + ); } table_with_metadata reader::impl::read(int64_t skip_rows, From 915a3fcd55548d9c09c67024d6bbe79423f9840f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 29 Feb 2024 13:37:21 -0800 Subject: [PATCH 120/321] Fix a bug in setting row granularity Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 68b85941709..1ca175b56b5 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -1420,7 +1420,10 @@ reader::impl::impl(std::size_t output_size_limit, _sources(std::move(sources)), _metadata{_sources, stream}, _selected_columns{_metadata.select_columns(options.get_columns())}, - _chunk_read_data{output_size_limit, data_read_limit, output_row_granularity} + _chunk_read_data{ + output_size_limit, + data_read_limit, + output_row_granularity > 0 ? output_row_granularity : DEFAULT_OUTPUT_ROW_GRANULARITY} { printf("construct reader , limit = %d, %d, gradunarity %d \n", From de4a365274634fba9dd278b0695e0969e7d637ce Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 29 Feb 2024 13:41:42 -0800 Subject: [PATCH 121/321] Fix test Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 2 +- cpp/tests/io/orc_chunked_reader_test.cpp | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 1ca175b56b5..b88dc361dc3 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -760,7 +760,7 @@ std::vector find_table_splits(table_view const& input, segmented_sizes.device_to_host_sync(stream); printf("total row sizes by segment = %d:\n", (int)segment_length); for (auto& size : segmented_sizes) { - printf("size: %ld, %zu\n", size.count, size.size_bytes); + printf("size: %ld, %zu\n", size.count, size.size_bytes / CHAR_BIT); } // TODO: exec_policy_nosync diff --git a/cpp/tests/io/orc_chunked_reader_test.cpp b/cpp/tests/io/orc_chunked_reader_test.cpp index e6a7d3fcb36..05fe45c631d 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cpp +++ b/cpp/tests/io/orc_chunked_reader_test.cpp @@ -161,19 +161,17 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadSimpleData) { auto const [expected, filepath] = generate_input(false); - auto const [result, num_chunks] = chunked_read(filepath, 240'000); + auto const [result, num_chunks] = chunked_read(filepath, 245'000); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } { auto const [expected, filepath] = generate_input(true); - auto const [result, num_chunks] = chunked_read(filepath, 240'000); + auto const [result, num_chunks] = chunked_read(filepath, 245'000); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - - exit(0); } #if 0 From 119002ed82d2856110246625c4c7396373bb90c4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 29 Feb 2024 13:48:11 -0800 Subject: [PATCH 122/321] Improve tests Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 4 ++++ cpp/tests/io/orc_chunked_reader_test.cpp | 26 +++++++++++++++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index b88dc361dc3..0b8a7a61226 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -1444,12 +1444,16 @@ table_with_metadata reader::impl::read(int64_t skip_rows, bool reader::impl::has_next() { + printf("==================query has next \n"); prepare_data(); + + printf("has next: %d\n", (int)_chunk_read_data.has_next()); return _chunk_read_data.has_next(); } table_with_metadata reader::impl::read_chunk() { + printf("==================call read chunk\n"); prepare_data(); return make_output_chunk(); } diff --git a/cpp/tests/io/orc_chunked_reader_test.cpp b/cpp/tests/io/orc_chunked_reader_test.cpp index 05fe45c631d..40b0313ac14 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cpp +++ b/cpp/tests/io/orc_chunked_reader_test.cpp @@ -150,31 +150,46 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadSimpleData) { auto constexpr num_rows = 40'000; - auto const generate_input = [num_rows](bool nullable) { + auto const generate_input = [num_rows](bool nullable, std::size_t stripe_rows) { std::vector> input_columns; auto const value_iter = thrust::make_counting_iterator(0); input_columns.emplace_back(int32s_col(value_iter, value_iter + num_rows).release()); input_columns.emplace_back(int64s_col(value_iter, value_iter + num_rows).release()); - return write_file(input_columns, "chunked_read_simple", nullable); + return write_file(input_columns, + "chunked_read_simple", + nullable, + cudf::io::default_stripe_size_bytes, + stripe_rows); }; { - auto const [expected, filepath] = generate_input(false); + auto const [expected, filepath] = generate_input(false, 1'000); + auto const [result, num_chunks] = chunked_read(filepath, 245'000); + EXPECT_EQ(num_chunks, 2); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + { + auto const [expected, filepath] = generate_input(false, cudf::io::default_stripe_size_rows); auto const [result, num_chunks] = chunked_read(filepath, 245'000); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } { - auto const [expected, filepath] = generate_input(true); + auto const [expected, filepath] = generate_input(true, 1'000); + auto const [result, num_chunks] = chunked_read(filepath, 245'000); + EXPECT_EQ(num_chunks, 2); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + { + auto const [expected, filepath] = generate_input(true, cudf::io::default_stripe_size_rows); auto const [result, num_chunks] = chunked_read(filepath, 245'000); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } } -#if 0 TEST_F(OrcChunkedReaderTest, TestChunkedReadBoundaryCases) { // Tests some specific boundary conditions in the split calculations. @@ -259,6 +274,7 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadBoundaryCases) } } +#if 0 TEST_F(OrcChunkedReaderTest, TestChunkedReadWithString) { auto constexpr num_rows = 60'000; From 818cfb7337ea5dc146107b604522e4a5a31c8990 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 29 Feb 2024 14:12:58 -0800 Subject: [PATCH 123/321] Implement adaptive size limit for decoding Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 2fe8f6753f1..859671c184c 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -628,6 +628,8 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // TODO: use 0.3 constant // _chunk_read_data.data_read_limit = total_stripe_sizes.back().size_bytes / 3; + // TODO: handle case for extremely large files. + _chunk_read_data.load_stripe_chunks = find_splits(total_stripe_sizes, num_stripes, _chunk_read_data.data_read_limit); @@ -820,8 +822,8 @@ void reader::impl::load_data() // Decoding is reset to start from the first chunk in `decode_stripe_chunks`. _chunk_read_data.curr_decode_stripe_chunk = 0; - // Decode all chunks if there is no read limit. - if (_chunk_read_data.data_read_limit == 0) { + // Decode all chunks if there is no read and no output limit. + if (_chunk_read_data.data_read_limit == 0 && _chunk_read_data.output_size_limit == 0) { _chunk_read_data.decode_stripe_chunks = {stripe_chunk}; // TODO: DEBUG only return; @@ -840,9 +842,19 @@ void reader::impl::load_data() // DEBUG only // _chunk_read_data.data_read_limit = stripe_decomp_sizes.back().size_bytes / 3; - // TODO: only decode stripes enough for output. + // TODO: Check and turn this 1.0. + // If there is no read limit, we still do not decode all stripes. + // Typically, the limit below will result in a very large number of stripes + // since their data is compressed to be much smaller than the actual data. + // However, it is still better than decoding all stripes, which may be a huge number. + auto const decode_size_limit = _chunk_read_data.data_read_limit > 0 + ? _chunk_read_data.data_read_limit + : _chunk_read_data.output_size_limit; + + printf("decode size limit: %d\n", (int)decode_size_limit); + _chunk_read_data.decode_stripe_chunks = - find_splits(stripe_decomp_sizes, stripe_chunk.count, _chunk_read_data.data_read_limit); + find_splits(stripe_decomp_sizes, stripe_chunk.count, decode_size_limit); for (auto& chunk : _chunk_read_data.decode_stripe_chunks) { chunk.start_idx += stripe_chunk.start_idx; } From bce6e8db3b02ac3d22107a7d4e174d359707f3fa Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 29 Feb 2024 16:27:49 -0800 Subject: [PATCH 124/321] Update `row_bit_count.cu` Signed-off-by: Nghia Truong --- cpp/include/cudf/detail/transform.hpp | 10 ++--- cpp/include/cudf/transform.hpp | 25 ++++++++++- cpp/src/io/orc/reader_impl.cu | 2 +- cpp/src/transform/row_bit_count.cu | 62 ++++++++++++++------------- 4 files changed, 62 insertions(+), 37 deletions(-) diff --git a/cpp/include/cudf/detail/transform.hpp b/cpp/include/cudf/detail/transform.hpp index 0ce7037b9e8..965fea84860 100644 --- a/cpp/include/cudf/detail/transform.hpp +++ b/cpp/include/cudf/detail/transform.hpp @@ -101,14 +101,14 @@ std::unique_ptr row_bit_count(table_view const& t, rmm::mr::device_memory_resource* mr); /** - * @copydoc cudf::segmented_bit_count + * @copydoc cudf::segmented_row_bit_count * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr segmented_bit_count(table_view const& t, - size_type segment_length, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +std::unique_ptr segmented_row_bit_count(table_view const& t, + size_type segment_length, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index 412fe17ef26..49ec3d7c0d5 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -224,5 +224,28 @@ std::unique_ptr row_bit_count( table_view const& t, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns an approximate cumulative size in bits of all columns in the `table_view` for + * each segment of rows. + * + * This is similar to counting bit size per row for the input table in `cudf::row_bit_count`, + * except that row sizes are accumulated by segments. + * + * Currently, only fixed-length segments are supported. In case the input table has number of rows + * not divisible by `segment_length`, its last segment is considered as shorter than the others. + * + * @throw std::invalid_argument if the input `segment_length` is non-positive or larger than the + * number of rows in the input table. + * + * @param t The table view to perform the computation on + * @param segment_length The number of rows in each segment for which the total size is computed + * @param mr Device memory resource used to allocate the returned columns' device memory + * @return A 32-bit integer column containing the bit counts for each segment of rows + */ +std::unique_ptr segmented_row_bit_count( + table_view const& t, + size_type segment_length, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 0b8a7a61226..974043b78db 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -740,7 +740,7 @@ std::vector find_table_splits(table_view const& input, if (segment_length == 0) { segment_length = input.num_rows(); } // Default 10k rows. - auto const d_segmented_sizes = cudf::detail::segmented_bit_count( + auto const d_segmented_sizes = cudf::detail::segmented_row_bit_count( input, segment_length, stream, rmm::mr::get_current_device_resource()); auto const d_size_begin = d_segmented_sizes->view().begin(); diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu index 8c0a805b00f..10260df8fb1 100644 --- a/cpp/src/transform/row_bit_count.cu +++ b/cpp/src/transform/row_bit_count.cu @@ -35,7 +35,7 @@ #include #include #include -#include +#include namespace cudf { namespace detail { @@ -404,11 +404,11 @@ __device__ size_type row_size_functor::operator()(column_device_vie * @param segment_length The number of rows in each segment for which the total size is computed * @param max_branch_depth Maximum depth of the span stack needed per-thread */ -CUDF_KERNEL void compute_row_sizes(device_span cols, - device_span info, - device_span output, - size_type segment_length, - size_type max_branch_depth) +CUDF_KERNEL void compute_segment_sizes(device_span cols, + device_span info, + device_span output, + size_type segment_length, + size_type max_branch_depth) { extern __shared__ row_span thread_branch_stacks[]; int const tid = threadIdx.x + blockIdx.x * blockDim.x; @@ -422,8 +422,11 @@ CUDF_KERNEL void compute_row_sizes(device_span cols, size_type branch_depth{0}; // current row span - always starts at spanning over `segment_length` rows. - auto const num_rows = cols[0].size(); - row_span cur_span{tid * segment_length, cuda::std::min((tid + 1) * segment_length, num_rows)}; + auto const num_rows = cols[0].size(); + auto const get_default_row_span = [=] { + return row_span{tid * segment_length, cuda::std::min((tid + 1) * segment_length, num_rows)}; + }; + auto cur_span = get_default_row_span(); // output size size_type& size = output[tid]; @@ -450,8 +453,7 @@ CUDF_KERNEL void compute_row_sizes(device_span cols, if (info[idx].depth == 0) { branch_depth = 0; last_branch_depth = 0; - cur_span = - row_span{tid * segment_length, cuda::std::min((tid + 1) * segment_length, num_rows)}; + cur_span = get_default_row_span(); } // add the contributing size of this row @@ -472,16 +474,18 @@ CUDF_KERNEL void compute_row_sizes(device_span cols, } // anonymous namespace -std::unique_ptr segmented_bit_count(table_view const& t, - size_type segment_length, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr segmented_row_bit_count(table_view const& t, + size_type segment_length, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(segment_length >= 1, "Invalid segment length.", std::invalid_argument); - - // no rows + // If there is no rows, segment_length will not be checked. if (t.num_rows() <= 0) { return cudf::make_empty_column(type_id::INT32); } + CUDF_EXPECTS(segment_length >= 1 && segment_length <= t.num_rows(), + "Invalid segment length.", + std::invalid_argument); + // flatten the hierarchy and determine some information about it. std::vector cols; std::vector info; @@ -498,11 +502,10 @@ std::unique_ptr segmented_bit_count(table_view const& t, // simple case. if we have no complex types (lists, strings, etc), the per-row size is already // trivially computed if (h_info.complex_type_count <= 0) { - thrust::transform( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_segments), + thrust::tabulate( + rmm::exec_policy_nosync(stream), mcv.begin(), + mcv.end(), cuda::proclaim_return_type( [segment_length, num_segments, @@ -510,9 +513,8 @@ std::unique_ptr segmented_bit_count(table_view const& t, per_row_size = h_info.simple_per_row_size] __device__(size_type const segment_idx) { // Since the number of rows may not divisible by segment_length, // the last segment may be shorter than the others. - auto const current_length = segment_idx + 1 < num_segments - ? segment_length - : num_rows - segment_length * segment_idx; + auto const current_length = + cuda::std::min(segment_length, num_rows - segment_length * segment_idx); return per_row_size * current_length; })); return output; @@ -544,7 +546,7 @@ std::unique_ptr segmented_bit_count(table_view const& t, CUDF_EXPECTS(block_size > 0, "Encountered a column hierarchy too complex for row_bit_count"); cudf::detail::grid_1d grid{num_segments, block_size, 1}; - compute_row_sizes<<>>( + compute_segment_sizes<<>>( {std::get<1>(d_cols), cols.size()}, {d_info.data(), info.size()}, {mcv.data(), static_cast(mcv.size())}, @@ -558,17 +560,17 @@ std::unique_ptr row_bit_count(table_view const& t, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return segmented_bit_count(t, 1, stream, mr); + return segmented_row_bit_count(t, 1, stream, mr); } } // namespace detail -std::unique_ptr segmented_bit_count(table_view const& t, - size_type segment_length, - rmm::mr::device_memory_resource* mr) +std::unique_ptr segmented_row_bit_count(table_view const& t, + size_type segment_length, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::segmented_bit_count(t, segment_length, cudf::get_default_stream(), mr); + return detail::segmented_row_bit_count(t, segment_length, cudf::get_default_stream(), mr); } std::unique_ptr row_bit_count(table_view const& t, rmm::mr::device_memory_resource* mr) From f6fc6f06067c38972aeb4c9f716d8593ef1fb387 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 29 Feb 2024 16:31:51 -0800 Subject: [PATCH 125/321] Fix caller to `segmented_row_bit_count` Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 974043b78db..ca7e1605bfc 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -739,6 +739,10 @@ std::vector find_table_splits(table_view const& input, // As such, set segment length to the number of rows. if (segment_length == 0) { segment_length = input.num_rows(); } + // If we have small number of rows, need to adjust segment_length before calling to + // `segmented_row_bit_count`. + segment_length = std::min(segment_length, input.num_rows()); + // Default 10k rows. auto const d_segmented_sizes = cudf::detail::segmented_row_bit_count( input, segment_length, stream, rmm::mr::get_current_device_resource()); From bd198dc71a4a4c444215f96bea4a060b6d43c0c4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 29 Feb 2024 17:01:29 -0800 Subject: [PATCH 126/321] Remove adaptive size for decoding Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 24 +++++++++--------------- cpp/tests/io/orc_chunked_reader_test.cpp | 7 ++++--- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 859671c184c..19aa6eac48a 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -763,7 +763,7 @@ void reader::impl::load_data() printf("collec stream [%d, %d, %d, %d]: dst = %lu, length = %lu\n", (int)info.id.stripe_idx, (int)info.id.level, - (int)info.id.orc_cold_idx, + (int)info.id.orc_col_idx, (int)info.id.kind, info.dst_pos, info.length); @@ -823,9 +823,14 @@ void reader::impl::load_data() _chunk_read_data.curr_decode_stripe_chunk = 0; // Decode all chunks if there is no read and no output limit. - if (_chunk_read_data.data_read_limit == 0 && _chunk_read_data.output_size_limit == 0) { + // In theory, we should just decode enough stripes for output one table chunk. + // However, we do not know the output size of each stripe after decompressing and decoding, + // thus we have to process all loaded chunks. + // That is because the estimated `max_uncompressed_size` of stream data from + // `ParseCompressedStripeData` is just the approximate of the maximum possible size, not the + // actual size, which can be much smaller in practice. + if (_chunk_read_data.data_read_limit == 0) { _chunk_read_data.decode_stripe_chunks = {stripe_chunk}; - // TODO: DEBUG only return; } @@ -842,19 +847,8 @@ void reader::impl::load_data() // DEBUG only // _chunk_read_data.data_read_limit = stripe_decomp_sizes.back().size_bytes / 3; - // TODO: Check and turn this 1.0. - // If there is no read limit, we still do not decode all stripes. - // Typically, the limit below will result in a very large number of stripes - // since their data is compressed to be much smaller than the actual data. - // However, it is still better than decoding all stripes, which may be a huge number. - auto const decode_size_limit = _chunk_read_data.data_read_limit > 0 - ? _chunk_read_data.data_read_limit - : _chunk_read_data.output_size_limit; - - printf("decode size limit: %d\n", (int)decode_size_limit); - _chunk_read_data.decode_stripe_chunks = - find_splits(stripe_decomp_sizes, stripe_chunk.count, decode_size_limit); + find_splits(stripe_decomp_sizes, stripe_chunk.count, _chunk_read_data.data_read_limit); for (auto& chunk : _chunk_read_data.decode_stripe_chunks) { chunk.start_idx += stripe_chunk.start_idx; } diff --git a/cpp/tests/io/orc_chunked_reader_test.cpp b/cpp/tests/io/orc_chunked_reader_test.cpp index 40b0313ac14..12ffaa30a8f 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cpp +++ b/cpp/tests/io/orc_chunked_reader_test.cpp @@ -62,13 +62,10 @@ auto write_file(std::vector>& input_columns, std::size_t stripe_size_bytes = cudf::io::default_stripe_size_bytes, cudf::size_type stripe_size_rows = cudf::io::default_stripe_size_rows) { - // Just shift nulls of the next column by one position to avoid having all nulls in the same - // table rows. if (nullable) { // Generate deterministic bitmask instead of random bitmask for easy computation of data size. auto const valid_iter = cudf::detail::make_counting_transform_iterator( 0, [](cudf::size_type i) { return i % 4 != 3; }); - cudf::size_type offset{0}; for (auto& col : input_columns) { auto const [null_mask, null_count] = @@ -79,6 +76,10 @@ auto write_file(std::vector>& input_columns, std::move(col), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + + // Shift nulls of the next column by one position, to avoid having all nulls + // in the same table rows. + ++offset; } } From d23591d2f63ea7504d53e0dd8f5975fe4d70cbd6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 29 Feb 2024 17:10:03 -0800 Subject: [PATCH 127/321] Update test Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cpp b/cpp/tests/io/orc_chunked_reader_test.cpp index 12ffaa30a8f..6068540438c 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cpp +++ b/cpp/tests/io/orc_chunked_reader_test.cpp @@ -100,7 +100,7 @@ auto write_file(std::vector>& input_columns, auto chunked_read(std::string const& filepath, std::size_t output_limit, std::size_t input_limit = 0, - cudf::size_type output_row_granularity = 0) + cudf::size_type output_row_granularity = 10'000) { auto const read_opts = cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).build(); @@ -214,7 +214,8 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadBoundaryCases) // Test with a very small limit: 1 byte { auto const [result, num_chunks] = chunked_read(filepath, 1); - EXPECT_EQ(num_chunks, 2); + // Number of chunks is 4 because of using default output_row_granularity=10k. + EXPECT_EQ(num_chunks, 4); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } From a581e96eb5dfb92d0934a3cd1fdfc4d593462ada Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 29 Feb 2024 20:39:55 -0800 Subject: [PATCH 128/321] Fix segment size processing Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index ca7e1605bfc..a7c9d1ab635 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -746,19 +746,26 @@ std::vector find_table_splits(table_view const& input, // Default 10k rows. auto const d_segmented_sizes = cudf::detail::segmented_row_bit_count( input, segment_length, stream, rmm::mr::get_current_device_resource()); - auto const d_size_begin = d_segmented_sizes->view().begin(); auto segmented_sizes = cudf::detail::hostdevice_vector(d_segmented_sizes->size(), stream); // TODO: exec_policy_nosync - thrust::transform(rmm::exec_policy(stream), - d_size_begin, - d_size_begin + d_segmented_sizes->size(), - segmented_sizes.d_begin(), - [segment_length] __device__(auto const size) { - return cumulative_size{segment_length, static_cast(size)}; - }); + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(d_segmented_sizes->size()), + segmented_sizes.d_begin(), + [segment_length, + num_rows = input.num_rows(), + d_sizes = d_segmented_sizes->view().begin()] __device__(auto const segment_idx) { + // Since the number of rows may not divisible by segment_length, + // the last segment may be shorter than the others. + auto const current_length = + cuda::std::min(segment_length, num_rows - segment_length * segment_idx); + auto const size = d_sizes[segment_idx]; + return cumulative_size{current_length, static_cast(size)}; + }); // TODO: remove: segmented_sizes.device_to_host_sync(stream); From 6072ffa0cc0353f14748e477aabdfd7d37034f90 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 29 Feb 2024 20:40:01 -0800 Subject: [PATCH 129/321] Add more test Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cpp | 78 +++++++++++++++++------- 1 file changed, 55 insertions(+), 23 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cpp b/cpp/tests/io/orc_chunked_reader_test.cpp index 6068540438c..237ffb43b8b 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cpp +++ b/cpp/tests/io/orc_chunked_reader_test.cpp @@ -97,6 +97,9 @@ auto write_file(std::vector>& input_columns, return std::pair{std::move(input_table), std::move(filepath)}; } +// NOTE: By default, output_row_granularity=10'000 rows. +// This means if the input file has more than 10k rows then the output chunk will never +// have less than 10k rows. auto chunked_read(std::string const& filepath, std::size_t output_limit, std::size_t input_limit = 0, @@ -204,73 +207,102 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadBoundaryCases) return write_file(input_columns, "chunked_read_simple_boundary", false /*nullable*/); }(); - // Test with zero limit: everything will be read in one chunk + // Test with zero limit: everything will be read in one chunk. { - auto const [result, num_chunks] = chunked_read(filepath, 0); + auto const [result, num_chunks] = chunked_read(filepath, 0UL); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - // Test with a very small limit: 1 byte + // Test with a very small limit: 1 byte. { - auto const [result, num_chunks] = chunked_read(filepath, 1); - // Number of chunks is 4 because of using default output_row_granularity=10k. + auto const [result, num_chunks] = chunked_read(filepath, 1UL); + // Number of chunks is 4 because of using default `output_row_granularity = 10k`. EXPECT_EQ(num_chunks, 4); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } + // Test with a very small limit: 1 byte, and small value of `output_row_granularity`. + { + auto const [result, num_chunks] = chunked_read(filepath, 1UL, 0UL, 1'000); + EXPECT_EQ(num_chunks, 40); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // Test with a very small limit: 1 byte, and large value of `output_row_granularity`. + { + auto const [result, num_chunks] = chunked_read(filepath, 1UL, 0UL, 30'000); + EXPECT_EQ(num_chunks, 2); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } // Test with a very large limit { auto const [result, num_chunks] = chunked_read(filepath, 2L << 40); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } + // Test with a limit slightly less than one granularity segment of data + // (output_row_granularity = 10k rows = 40'000 bytes). + { + auto const [result, num_chunks] = chunked_read(filepath, 39'000UL); + EXPECT_EQ(num_chunks, 4); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } - // Test with a limit slightly less than one page of data + // Test with a limit exactly the size one granularity segment of data + // (output_row_granularity = 10k rows = 40'000 bytes). { - auto const [result, num_chunks] = chunked_read(filepath, 79'000); - EXPECT_EQ(num_chunks, 2); + auto const [result, num_chunks] = chunked_read(filepath, 40'000UL); + EXPECT_EQ(num_chunks, 4); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - // Test with a limit exactly the size one page of data + // Test with a limit slightly more than one granularity segment of data + // (output_row_granularity = 10k rows = 40'000 bytes). { - auto const [result, num_chunks] = chunked_read(filepath, 80'000); - EXPECT_EQ(num_chunks, 2); + auto const [result, num_chunks] = chunked_read(filepath, 41'000UL); + EXPECT_EQ(num_chunks, 4); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - // Test with a limit slightly more the size one page of data + // Test with a limit slightly less than two granularity segments of data { - auto const [result, num_chunks] = chunked_read(filepath, 81'000); - EXPECT_EQ(num_chunks, 2); + auto const [result, num_chunks] = chunked_read(filepath, 79'000UL); + EXPECT_EQ(num_chunks, 4); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - // Test with a limit slightly less than two pages of data + // Test with a limit exactly the size of two granularity segments of data minus 1 byte. { - auto const [result, num_chunks] = chunked_read(filepath, 159'000); + auto const [result, num_chunks] = chunked_read(filepath, 79'999UL); + EXPECT_EQ(num_chunks, 4); + CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); + } + + // Test with a limit exactly the size of two granularity segments of data. + { + auto const [result, num_chunks] = chunked_read(filepath, 80'000UL); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - // Test with a limit exactly the size of two pages of data minus one byte + // Test with a limit slightly more the size two granularity segments of data. { - auto const [result, num_chunks] = chunked_read(filepath, 159'999); + auto const [result, num_chunks] = chunked_read(filepath, 81'000); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - // Test with a limit exactly the size of two pages of data + // Test with a limit exactly the size of the input minus 1 byte. { - auto const [result, num_chunks] = chunked_read(filepath, 160'000); - EXPECT_EQ(num_chunks, 1); + auto const [result, num_chunks] = chunked_read(filepath, 159'999UL); + EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - // Test with a limit slightly more the size two pages of data + // Test with a limit exactly the size of the input. { - auto const [result, num_chunks] = chunked_read(filepath, 161'000); + auto const [result, num_chunks] = chunked_read(filepath, 160'000UL); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } From afb4ffaef7fc2a384c8479291aa9601018b3261c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 29 Feb 2024 22:00:19 -0800 Subject: [PATCH 130/321] Add test with strings Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cpp | 61 ++++++++++++++---------- 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cpp b/cpp/tests/io/orc_chunked_reader_test.cpp index 237ffb43b8b..b4549e1433b 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cpp +++ b/cpp/tests/io/orc_chunked_reader_test.cpp @@ -132,6 +132,13 @@ auto chunked_read(std::string const& filepath, return std::pair(cudf::concatenate(out_tviews), num_chunks); } +auto chunked_read(std::string const& filepath, + std::size_t output_limit, + cudf::size_type output_row_granularity) +{ + return chunked_read(filepath, output_limit, 0UL, output_row_granularity); +} + } // namespace struct OrcChunkedReaderTest : public cudf::test::BaseFixture {}; @@ -224,14 +231,14 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadBoundaryCases) // Test with a very small limit: 1 byte, and small value of `output_row_granularity`. { - auto const [result, num_chunks] = chunked_read(filepath, 1UL, 0UL, 1'000); + auto const [result, num_chunks] = chunked_read(filepath, 1UL, 1'000); EXPECT_EQ(num_chunks, 40); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Test with a very small limit: 1 byte, and large value of `output_row_granularity`. { - auto const [result, num_chunks] = chunked_read(filepath, 1UL, 0UL, 30'000); + auto const [result, num_chunks] = chunked_read(filepath, 1UL, 30'000); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } @@ -308,22 +315,22 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadBoundaryCases) } } -#if 0 TEST_F(OrcChunkedReaderTest, TestChunkedReadWithString) { - auto constexpr num_rows = 60'000; + auto constexpr num_rows = 60'000; + auto constexpr output_row_granularity = 20'000; auto const generate_input = [num_rows](bool nullable) { std::vector> input_columns; auto const value_iter = thrust::make_counting_iterator(0); - // ints Page total bytes cumulative bytes - // 20000 rows of 4 bytes each = A0 80000 80000 - // 20000 rows of 4 bytes each = A1 80000 160000 - // 20000 rows of 4 bytes each = A2 80000 240000 + // ints Granularity Segment total bytes cumulative bytes + // 20000 rows of 4 bytes each = A0 80000 80000 + // 20000 rows of 4 bytes each = A1 80000 160000 + // 20000 rows of 4 bytes each = A2 80000 240000 input_columns.emplace_back(int32s_col(value_iter, value_iter + num_rows).release()); - // strings Page total bytes cumulative bytes + // strings Granularity Segment total bytes cumulative bytes // 20000 rows of 1 char each (20000 + 80004) = B0 100004 100004 // 20000 rows of 4 chars each (80000 + 80004) = B1 160004 260008 // 20000 rows of 16 chars each (320000 + 80004) = B2 400004 660012 @@ -342,42 +349,38 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithString) // skip_rows / num_rows // byte_limit==500000 should give 2 chunks: {0, 40000}, {40000, 20000} // byte_limit==1000000 should give 1 chunks: {0, 60000}, - return write_file(input_columns, - "chunked_read_with_strings", - nullable, - 512 * 1024, // 512KB per page - 20000 // 20k rows per page - ); + return write_file(input_columns, "chunked_read_with_strings", nullable); }; auto const [expected_no_null, filepath_no_null] = generate_input(false); auto const [expected_with_nulls, filepath_with_nulls] = generate_input(true); - // Test with zero limit: everything will be read in one chunk + // Test with zero limit: everything will be read in one chunk. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 0); + auto const [result, num_chunks] = chunked_read(filepath_no_null, 0UL); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0UL); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } - // Test with a very small limit: 1 byte + // Test with a very small limit: 1 byte. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 1); + auto const [result, num_chunks] = chunked_read(filepath_no_null, 1UL, output_row_granularity); EXPECT_EQ(num_chunks, 3); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1); + auto const [result, num_chunks] = + chunked_read(filepath_with_nulls, 1UL, output_row_granularity); EXPECT_EQ(num_chunks, 3); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } - // Test with a very large limit + // Test with a very large limit. { auto const [result, num_chunks] = chunked_read(filepath_no_null, 2L << 40); EXPECT_EQ(num_chunks, 1); @@ -392,28 +395,34 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithString) // Other tests: { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 500'000); + auto const [result, num_chunks] = + chunked_read(filepath_no_null, 500'000UL, output_row_granularity); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 500'000); + auto const [result, num_chunks] = + chunked_read(filepath_with_nulls, 500'000UL, output_row_granularity); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000); + auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000UL); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'000'000); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'000'000UL); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } + +#if 0 +#endif } +#if 0 TEST_F(OrcChunkedReaderTest, TestChunkedReadWithStringPrecise) { auto constexpr num_rows = 60'000; From 4b1665e16f1931deaa5e56b6e7dc4787b8a0f066 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 29 Feb 2024 22:51:46 -0800 Subject: [PATCH 131/321] Add more tests Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cpp | 341 +++++++---------------- 1 file changed, 108 insertions(+), 233 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cpp b/cpp/tests/io/orc_chunked_reader_test.cpp index b4549e1433b..49571c23f0b 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cpp +++ b/cpp/tests/io/orc_chunked_reader_test.cpp @@ -417,62 +417,12 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithString) EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } - -#if 0 -#endif -} - -#if 0 -TEST_F(OrcChunkedReaderTest, TestChunkedReadWithStringPrecise) -{ - auto constexpr num_rows = 60'000; - - auto const generate_input = [num_rows](bool nullable) { - std::vector> input_columns; - - // strings Page total bytes cumulative - // 20000 rows alternating 1-4 chars each (50000 + 80004) A0 130004 130004 - // 20000 rows alternating 1-4 chars each (50000 + 80004) A1 130004 260008 - // ... - auto const strings = std::vector{"a", "bbbb"}; - auto const str_iter = - cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { return strings[i % 2]; }); - input_columns.emplace_back(strings_col(str_iter, str_iter + num_rows).release()); - - // Cumulative sizes: - // A0 : 130004 - // A1 : 260008 - // A2 : 390012 - return write_file(input_columns, - "chunked_read_with_strings_precise", - nullable, - 512 * 1024, // 512KB per page - 20000 // 20k rows per page - ); - }; - - auto const [expected_no_null, filepath_no_null] = generate_input(false); - - // a chunk limit of 1 byte less than 2 pages should force it to produce 3 chunks: - // each 1 page in size - { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 260'007); - EXPECT_EQ(num_chunks, 3); - CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); - } - - // a chunk limit of exactly equal to 2 pages should force it to produce 2 chunks - // pages 0-1 and page 2 - { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 260'008); - EXPECT_EQ(num_chunks, 2); - CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); - } } TEST_F(OrcChunkedReaderTest, TestChunkedReadWithStructs) { - auto constexpr num_rows = 100'000; + auto constexpr num_rows = 100'000; + auto constexpr output_row_granularity = 20'000; auto const generate_input = [num_rows](bool nullable) { std::vector> input_columns; @@ -489,49 +439,47 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithStructs) return structs_col{{child1, child2, child3}}.release(); }()); - return write_file(input_columns, - "chunked_read_with_structs", - nullable, - 512 * 1024, // 512KB per page - 20000 // 20k rows per page - ); + return write_file(input_columns, "chunked_read_with_structs", nullable); }; auto const [expected_no_null, filepath_no_null] = generate_input(false); auto const [expected_with_nulls, filepath_with_nulls] = generate_input(true); - // Test with zero limit: everything will be read in one chunk + // Test with zero limit: everything will be read in one chunk. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 0); + auto const [result, num_chunks] = chunked_read(filepath_no_null, 0UL); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0UL); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } - // Test with a very small limit: 1 byte + // Test with a very small limit: 1 byte. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 1); + auto const [result, num_chunks] = chunked_read(filepath_no_null, 1UL, output_row_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1); + auto const [result, num_chunks] = + chunked_read(filepath_with_nulls, 1UL, output_row_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } - // Test with a very large limit + // Test with a very large limit. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 2L << 40); + auto const [result, num_chunks] = + chunked_read(filepath_no_null, 2L << 40, output_row_granularity); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2L << 40); + auto const [result, num_chunks] = + chunked_read(filepath_with_nulls, 2L << 40, output_row_granularity); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } @@ -539,12 +487,14 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithStructs) // Other tests: { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 500'000); + auto const [result, num_chunks] = + chunked_read(filepath_no_null, 500'000UL, output_row_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 500'000); + auto const [result, num_chunks] = + chunked_read(filepath_with_nulls, 500'000UL, output_row_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } @@ -552,15 +502,19 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithStructs) TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsNoNulls) { - auto constexpr num_rows = 100'000; + auto constexpr num_rows = 100'000; + auto constexpr output_row_granularity = 20'000; auto const [expected, filepath] = [num_rows]() { std::vector> input_columns; - // 20000 rows in 1 page consist of: + // 20000 rows in 1 segment consist of: // // 20001 offsets : 80004 bytes // 30000 ints : 120000 bytes // total : 200004 bytes + // + // However, `segmented_row_bit_count` used in chunked reader returns 200000, + // thus we consider as having only 200000 bytes in total. auto const template_lists = int32s_lists_col{ int32s_lists_col{}, int32s_lists_col{0}, int32s_lists_col{1, 2}, int32s_lists_col{3, 4, 5}}; @@ -570,59 +524,54 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsNoNulls) input_columns.emplace_back( std::move(cudf::gather(cudf::table_view{{template_lists}}, gather_map)->release().front())); - return write_file(input_columns, - "chunked_read_with_lists_no_null", - false /*nullable*/, - 512 * 1024, // 512KB per page - 20000 // 20k rows per page - ); + return write_file(input_columns, "chunked_read_with_lists_no_null", false /*nullable*/); }(); - // Test with zero limit: everything will be read in one chunk + // Test with zero limit: everything will be read in one chunk. { - auto const [result, num_chunks] = chunked_read(filepath, 0); + auto const [result, num_chunks] = chunked_read(filepath, 0UL); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - // Test with a very small limit: 1 byte + // Test with a very small limit: 1 byte. { - auto const [result, num_chunks] = chunked_read(filepath, 1); + auto const [result, num_chunks] = chunked_read(filepath, 1UL, output_row_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - // Test with a very large limit + // Test with a very large limit. { - auto const [result, num_chunks] = chunked_read(filepath, 2L << 40); + auto const [result, num_chunks] = chunked_read(filepath, 2L << 40UL, output_row_granularity); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - // chunk size slightly less than 1 page (forcing it to be at least 1 page per read) + // Chunk size slightly less than 1 row segment (forcing it to be at least 1 segment per read). { - auto const [result, num_chunks] = chunked_read(filepath, 200'000); + auto const [result, num_chunks] = chunked_read(filepath, 199'999UL, output_row_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - // chunk size exactly 1 page + // Chunk size exactly 1 row segment. { - auto const [result, num_chunks] = chunked_read(filepath, 200'004); + auto const [result, num_chunks] = chunked_read(filepath, 200'000UL, output_row_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - // chunk size 2 pages. 3 chunks (2 pages + 2 pages + 1 page) + // Chunk size == size of 2 segments. Totally have 3 chunks. { - auto const [result, num_chunks] = chunked_read(filepath, 400'008); + auto const [result, num_chunks] = chunked_read(filepath, 400'000UL, output_row_granularity); EXPECT_EQ(num_chunks, 3); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - // chunk size 2 pages minus one byte: each chunk will be just one page + // Chunk size == size of 2 segment minus one byte: each chunk will be just one segment. { - auto const [result, num_chunks] = chunked_read(filepath, 400'007); + auto const [result, num_chunks] = chunked_read(filepath, 399'999UL, output_row_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } @@ -630,7 +579,8 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsNoNulls) TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsHavingNulls) { - auto constexpr num_rows = 100'000; + auto constexpr num_rows = 100'000; + auto constexpr output_row_granularity = 20'000; auto const [expected, filepath] = [num_rows]() { std::vector> input_columns; @@ -640,6 +590,9 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsHavingNulls) // 20001 offsets : 80004 bytes // 15000 ints : 60000 bytes // total : 142504 bytes + // + // However, `segmented_row_bit_count` used in chunked reader returns 142500, + // thus we consider as having only 142500 bytes in total. auto const template_lists = int32s_lists_col{// these will all be null int32s_lists_col{}, @@ -652,59 +605,54 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsHavingNulls) input_columns.emplace_back( std::move(cudf::gather(cudf::table_view{{template_lists}}, gather_map)->release().front())); - return write_file(input_columns, - "chunked_read_with_lists_nulls", - true /*nullable*/, - 512 * 1024, // 512KB per page - 20000 // 20k rows per page - ); + return write_file(input_columns, "chunked_read_with_lists_nulls", true /*nullable*/); }(); - // Test with zero limit: everything will be read in one chunk + // Test with zero limit: everything will be read in one chunk. { - auto const [result, num_chunks] = chunked_read(filepath, 0); + auto const [result, num_chunks] = chunked_read(filepath, 0UL); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - // Test with a very small limit: 1 byte + // Test with a very small limit: 1 byte. { - auto const [result, num_chunks] = chunked_read(filepath, 1); + auto const [result, num_chunks] = chunked_read(filepath, 1UL, output_row_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - // Test with a very large limit + // Test with a very large limit. { - auto const [result, num_chunks] = chunked_read(filepath, 2L << 40); + auto const [result, num_chunks] = chunked_read(filepath, 2L << 40, output_row_granularity); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - // chunk size slightly less than 1 page (forcing it to be at least 1 page per read) + // Chunk size slightly less than 1 row segment (forcing it to be at least 1 segment per read). { - auto const [result, num_chunks] = chunked_read(filepath, 142'500); + auto const [result, num_chunks] = chunked_read(filepath, 142'499UL, output_row_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - // chunk size exactly 1 page + // Chunk size exactly 1 row segment. { - auto const [result, num_chunks] = chunked_read(filepath, 142'504); + auto const [result, num_chunks] = chunked_read(filepath, 142'500UL, output_row_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - // chunk size 2 pages. 3 chunks (2 pages + 2 pages + 1 page) + // Chunk size == size of 2 segments. Totally have 3 chunks. { - auto const [result, num_chunks] = chunked_read(filepath, 285'008); + auto const [result, num_chunks] = chunked_read(filepath, 285'000UL, output_row_granularity); EXPECT_EQ(num_chunks, 3); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } - // chunk size 2 pages minus 1 byte: each chunk will be just one page + // Chunk size == size of 2 segment minus one byte: each chunk will be just one segment. { - auto const [result, num_chunks] = chunked_read(filepath, 285'007); + auto const [result, num_chunks] = chunked_read(filepath, 284'999UL, output_row_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } @@ -714,6 +662,8 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithStructsOfLists) { auto constexpr num_rows = 100'000; + // Size of each segment (10k row by default) is from 537k to 560k bytes (no nulls) + // and from 456k to 473k (with nulls). auto const generate_input = [num_rows](bool nullable) { std::vector> input_columns; auto const int_iter = thrust::make_counting_iterator(0); @@ -740,42 +690,37 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithStructsOfLists) return structs_col(std::move(child_columns)).release(); }()); - return write_file(input_columns, - "chunked_read_with_structs_of_lists", - nullable, - 512 * 1024, // 512KB per page - 20000 // 20k rows per page - ); + return write_file(input_columns, "chunked_read_with_structs_of_lists", nullable); }; auto const [expected_no_null, filepath_no_null] = generate_input(false); auto const [expected_with_nulls, filepath_with_nulls] = generate_input(true); - // Test with zero limit: everything will be read in one chunk + // Test with zero limit: everything will be read in one chunk. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 0); + auto const [result, num_chunks] = chunked_read(filepath_no_null, 0UL); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0UL); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } - // Test with a very small limit: 1 byte + // Test with a very small limit: 1 byte. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 1); + auto const [result, num_chunks] = chunked_read(filepath_no_null, 1UL); EXPECT_EQ(num_chunks, 10); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1); - EXPECT_EQ(num_chunks, 5); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1UL); + EXPECT_EQ(num_chunks, 10); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } - // Test with a very large limit + // Test with a very large limit. { auto const [result, num_chunks] = chunked_read(filepath_no_null, 2L << 40); EXPECT_EQ(num_chunks, 1); @@ -789,55 +734,50 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithStructsOfLists) // Other tests: - // for these tests, different columns get written to different numbers of pages so it's a - // little tricky to describe the expected results by page counts. To get an idea of how - // these values are chosen, see the debug output from the call to print_cumulative_row_info() in - // reader_impl_preprocess.cu -> find_splits() - { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000); - EXPECT_EQ(num_chunks, 7); + auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000UL); + EXPECT_EQ(num_chunks, 10); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'500'000); - EXPECT_EQ(num_chunks, 4); + auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'500'000UL); + EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 2'000'000); + auto const [result, num_chunks] = chunked_read(filepath_no_null, 2'000'000UL); EXPECT_EQ(num_chunks, 4); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 5'000'000); + auto const [result, num_chunks] = chunked_read(filepath_no_null, 5'000'000UL); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'000'000); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'000'000UL); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'500'000); - EXPECT_EQ(num_chunks, 5); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'500'000UL); + EXPECT_EQ(num_chunks, 4); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2'000'000); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2'000'000UL); EXPECT_EQ(num_chunks, 3); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 5'000'000); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 5'000'000UL); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } @@ -847,6 +787,8 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsOfStructs) { auto constexpr num_rows = 100'000; + // Size of each segment (10k row by default) is from 450k to 530k bytes (no nulls) + // and from 330k to 380k (with nulls). auto const generate_input = [num_rows](bool nullable) { std::vector> input_columns; auto const int_iter = thrust::make_counting_iterator(0); @@ -880,42 +822,37 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsOfStructs) 0, rmm::device_buffer{})); - return write_file(input_columns, - "chunked_read_with_lists_of_structs", - nullable, - 512 * 1024, // 512KB per page - 20000 // 20k rows per page - ); + return write_file(input_columns, "chunked_read_with_lists_of_structs", nullable); }; auto const [expected_no_null, filepath_no_null] = generate_input(false); auto const [expected_with_nulls, filepath_with_nulls] = generate_input(true); - // Test with zero limit: everything will be read in one chunk + // Test with zero limit: everything will be read in one chunk. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 0); + auto const [result, num_chunks] = chunked_read(filepath_no_null, 0UL); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0UL); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } - // Test with a very small limit: 1 byte + // Test with a very small limit: 1 byte. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 1); + auto const [result, num_chunks] = chunked_read(filepath_no_null, 1UL); EXPECT_EQ(num_chunks, 10); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1); - EXPECT_EQ(num_chunks, 5); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1UL); + EXPECT_EQ(num_chunks, 10); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } - // Test with a very large limit + // Test with a very large limit. { auto const [result, num_chunks] = chunked_read(filepath_no_null, 2L << 40); EXPECT_EQ(num_chunks, 1); @@ -927,59 +864,58 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsOfStructs) CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } - // for these tests, different columns get written to different numbers of pages so it's a - // little tricky to describe the expected results by page counts. To get an idea of how - // these values are chosen, see the debug output from the call to print_cumulative_row_info() in - // reader_impl_preprocess.cu -> find_splits() + // Other tests. + { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000); + auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000UL); EXPECT_EQ(num_chunks, 7); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'500'000); + auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'500'000UL); EXPECT_EQ(num_chunks, 4); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 2'000'000); - EXPECT_EQ(num_chunks, 4); + auto const [result, num_chunks] = chunked_read(filepath_no_null, 2'000'000UL); + EXPECT_EQ(num_chunks, 3); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 5'000'000); - EXPECT_EQ(num_chunks, 2); + auto const [result, num_chunks] = chunked_read(filepath_no_null, 5'000'000UL); + EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'000'000); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'000'000UL); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'500'000); - EXPECT_EQ(num_chunks, 4); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'500'000UL); + EXPECT_EQ(num_chunks, 3); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2'000'000); - EXPECT_EQ(num_chunks, 3); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2'000'000UL); + EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 5'000'000); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 5'000'000UL); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } } +#if 0 TEST_F(OrcChunkedReaderTest, TestChunkedReadNullCount) { auto constexpr num_rows = 100'000; @@ -1011,65 +947,4 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadNullCount) } while (reader.has_next()); } -TEST_F(OrcChunkedReaderTest, InputLimitSimple) -{ - auto const filepath = temp_env->get_temp_filepath("input_limit_10_rowgroups.parquet"); - - // This results in 10 grow groups, at 4001150 bytes per row group - constexpr int num_rows = 25'000'000; - auto value_iter = cudf::detail::make_counting_transform_iterator(0, [](int i) { return i; }); - cudf::test::fixed_width_column_wrapper expected(value_iter, value_iter + num_rows); - cudf::io::parquet_writer_options opts = - cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, - cudf::table_view{{expected}}) - // note: it is unnecessary to force compression to NONE here because the size we are using in - // the row group is the uncompressed data size. But forcing the dictionary policy to - // dictionary_policy::NEVER is necessary to prevent changes in the - // decompressed-but-not-yet-decoded data. - .dictionary_policy(cudf::io::dictionary_policy::NEVER); - - cudf::io::write_parquet(opts); - - { - // no chunking - auto const [result, num_chunks] = chunked_read(filepath, 0, 0); - EXPECT_EQ(num_chunks, 1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->get_column(0)); - } - - { - // 25 chunks of 100k rows each - auto const [result, num_chunks] = chunked_read(filepath, 0, 1); - EXPECT_EQ(num_chunks, 25); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->get_column(0)); - } - - { - // 25 chunks of 100k rows each - auto const [result, num_chunks] = chunked_read(filepath, 0, 4000000); - EXPECT_EQ(num_chunks, 25); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->get_column(0)); - } - - { - // 25 chunks of 100k rows each - auto const [result, num_chunks] = chunked_read(filepath, 0, 4100000); - EXPECT_EQ(num_chunks, 25); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->get_column(0)); - } - - { - // 12 chunks of 200k rows each, plus 1 final chunk of 100k rows. - auto const [result, num_chunks] = chunked_read(filepath, 0, 8002301); - EXPECT_EQ(num_chunks, 13); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->get_column(0)); - } - - { - // 1 big chunk - auto const [result, num_chunks] = chunked_read(filepath, 0, size_t{1} * 1024 * 1024 * 1024); - EXPECT_EQ(num_chunks, 1); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->get_column(0)); - } -} #endif From e08984f6dd11b01441480203a6358484a5251c00 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 1 Mar 2024 10:56:33 -0800 Subject: [PATCH 132/321] Add more test --- cpp/tests/io/orc_chunked_reader_test.cpp | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cpp b/cpp/tests/io/orc_chunked_reader_test.cpp index 49571c23f0b..a3884745da4 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cpp +++ b/cpp/tests/io/orc_chunked_reader_test.cpp @@ -915,7 +915,6 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsOfStructs) } } -#if 0 TEST_F(OrcChunkedReaderTest, TestChunkedReadNullCount) { auto constexpr num_rows = 100'000; @@ -928,23 +927,22 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadNullCount) cols.push_back(col.release()); auto const expected = std::make_unique(std::move(cols)); - auto const filepath = temp_env->get_temp_filepath("chunked_reader_null_count.parquet"); - auto const page_limit_rows = num_rows / 5; + auto const filepath = temp_env->get_temp_filepath("chunked_reader_null_count.orc"); + auto const stripe_limit_rows = num_rows / 5; auto const write_opts = - cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected) - .max_page_size_rows(page_limit_rows) // 20k rows per page + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, *expected) + .stripe_size_rows(stripe_limit_rows) .build(); - cudf::io::write_parquet(write_opts); + cudf::io::write_orc(write_opts); - auto const byte_limit = page_limit_rows * sizeof(int); + auto const byte_limit = stripe_limit_rows * sizeof(int); auto const read_opts = - cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).build(); - auto reader = cudf::io::chunked_parquet_reader(byte_limit, read_opts); + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).build(); + auto reader = + cudf::io::chunked_orc_reader(byte_limit, 0UL /*read_limit*/, stripe_limit_rows, read_opts); do { - // Every fourth row is null - EXPECT_EQ(reader.read_chunk().tbl->get_column(0).null_count(), page_limit_rows / 4); + // Every fourth row is null. + EXPECT_EQ(reader.read_chunk().tbl->get_column(0).null_count(), stripe_limit_rows / 4UL); } while (reader.has_next()); } - -#endif From d555b5466ac525a9ebad3e488593465e0a7af67f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 1 Mar 2024 13:03:26 -0800 Subject: [PATCH 133/321] Implement test limit function Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cpp | 56 ++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/cpp/tests/io/orc_chunked_reader_test.cpp b/cpp/tests/io/orc_chunked_reader_test.cpp index a3884745da4..3f52668aee2 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cpp +++ b/cpp/tests/io/orc_chunked_reader_test.cpp @@ -946,3 +946,59 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadNullCount) EXPECT_EQ(reader.read_chunk().tbl->get_column(0).null_count(), stripe_limit_rows / 4UL); } while (reader.has_next()); } + +namespace { + +constexpr size_t input_limit_expected_file_count = 3; + +std::vector input_limit_get_test_names(std::string const& base_filename) +{ + return {base_filename + "_a.orc", base_filename + "_b.orc", base_filename + "_c.orc"}; +} + +void input_limit_test_write_one(std::string const& filepath, + cudf::table_view const& input, + cudf::io::compression_type compression) +{ + auto const out_opts = cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, input) + .compression(compression) + .build(); + cudf::io::write_orc(out_opts); +} + +void input_limit_test_write(std::vector const& test_filenames, + cudf::table_view const& input) +{ + CUDF_EXPECTS(test_filenames.size() == input_limit_expected_file_count, + "Unexpected count of test filenames."); + + // No compression + input_limit_test_write_one(test_filenames[0], input, cudf::io::compression_type::NONE); + + // Compression with a codec that uses a lot of scratch space at decode time (2.5x the total + // decompressed buffer size). + input_limit_test_write_one(test_filenames[1], input, cudf::io::compression_type::ZSTD); + + // Compression with a codec that uses no scratch space at decode time. + input_limit_test_write_one(test_filenames[2], input, cudf::io::compression_type::SNAPPY); +} + +void input_limit_test_read(std::vector const& test_filenames, + cudf::table_view const& input, + size_t output_limit, + size_t input_limit, + int const* expected_chunk_counts) +{ + CUDF_EXPECTS(test_filenames.size() == input_limit_expected_file_count, + "Unexpected count of test filenames."); + + for (size_t idx = 0; idx < test_filenames.size(); idx++) { + auto const result = chunked_read(test_filenames[idx], output_limit, input_limit); + EXPECT_EQ(expected_chunk_counts[idx], result.second) + << "Unexpected number of chunks produced in chunk read."; + // TODO: equal + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.first, input); + } +} + +} // namespace From cfb8345d70a475708555b5a3c1808d81f93c2ba3 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 1 Mar 2024 14:30:01 -0800 Subject: [PATCH 134/321] Implement `load_limit_ratio` Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 24 +++++++++++++----------- cpp/src/io/orc/reader_impl_chunking.hpp | 3 ++- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 19aa6eac48a..5aa499fd9e9 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -487,6 +487,7 @@ void reader::impl::global_preprocess(uint64_t skip_rows, // Logically view streams as columns _file_itm_data.lvl_stream_info.resize(_selected_columns.num_levels()); + // TODO: handle large number of stripes. // Get the total number of stripes across all input files. auto const num_stripes = selected_stripes.size(); @@ -624,14 +625,13 @@ void reader::impl::global_preprocess(uint64_t skip_rows, printf("size: %ld, %zu\n", size.count, size.size_bytes); } - // DEBUG only - // TODO: use 0.3 constant - // _chunk_read_data.data_read_limit = total_stripe_sizes.back().size_bytes / 3; - // TODO: handle case for extremely large files. - - _chunk_read_data.load_stripe_chunks = - find_splits(total_stripe_sizes, num_stripes, _chunk_read_data.data_read_limit); + auto const load_limit = [&] { + auto const tmp = static_cast(_chunk_read_data.data_read_limit * + chunk_read_data::load_limit_ratio); + return tmp > 0UL ? tmp : 1UL; + }(); + _chunk_read_data.load_stripe_chunks = find_splits(total_stripe_sizes, num_stripes, load_limit); #ifndef PRINT_DEBUG auto& splits = _chunk_read_data.load_stripe_chunks; @@ -844,11 +844,13 @@ void reader::impl::load_data() stripe_decomp_sizes.device_to_host_sync(_stream); - // DEBUG only - // _chunk_read_data.data_read_limit = stripe_decomp_sizes.back().size_bytes / 3; - + auto const decode_limit = [&] { + auto const tmp = static_cast(_chunk_read_data.data_read_limit * + (1.0 - chunk_read_data::load_limit_ratio)); + return tmp > 0UL ? tmp : 1UL; + }(); _chunk_read_data.decode_stripe_chunks = - find_splits(stripe_decomp_sizes, stripe_chunk.count, _chunk_read_data.data_read_limit); + find_splits(stripe_decomp_sizes, stripe_chunk.count, decode_limit); for (auto& chunk : _chunk_read_data.decode_stripe_chunks) { chunk.start_idx += stripe_chunk.start_idx; } diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index cc37ac585a3..a721226b78b 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -211,7 +211,8 @@ struct chunk_read_data { std::size_t output_size_limit; // maximum size (in bytes) of an output chunk, or 0 for no limit std::size_t data_read_limit; // approximate maximum size (in bytes) used for store // intermediate data, or 0 for no limit - size_type output_row_granularity; // TODO + size_type output_row_granularity; // TODO + static double constexpr load_limit_ratio{0.3}; // TODO // Chunks of stripes that can be load into memory such that their data size is within a size // limit. From e0721246c987efef211839c9c82924ab825acfee Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 1 Mar 2024 14:50:17 -0800 Subject: [PATCH 135/321] Add new test Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cpp | 60 +++++++++++++++++++----- 1 file changed, 47 insertions(+), 13 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cpp b/cpp/tests/io/orc_chunked_reader_test.cpp index 3f52668aee2..3174496f313 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cpp +++ b/cpp/tests/io/orc_chunked_reader_test.cpp @@ -949,7 +949,7 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadNullCount) namespace { -constexpr size_t input_limit_expected_file_count = 3; +std::size_t constexpr input_limit_expected_file_count = 3; std::vector input_limit_get_test_names(std::string const& base_filename) { @@ -962,43 +962,77 @@ void input_limit_test_write_one(std::string const& filepath, { auto const out_opts = cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, input) .compression(compression) + .stripe_size_rows(10'000) // intentionally write small stripes .build(); cudf::io::write_orc(out_opts); } -void input_limit_test_write(std::vector const& test_filenames, +void input_limit_test_write(std::vector const& test_files, cudf::table_view const& input) { - CUDF_EXPECTS(test_filenames.size() == input_limit_expected_file_count, + CUDF_EXPECTS(test_files.size() == input_limit_expected_file_count, "Unexpected count of test filenames."); // No compression - input_limit_test_write_one(test_filenames[0], input, cudf::io::compression_type::NONE); + input_limit_test_write_one(test_files[0], input, cudf::io::compression_type::NONE); // Compression with a codec that uses a lot of scratch space at decode time (2.5x the total // decompressed buffer size). - input_limit_test_write_one(test_filenames[1], input, cudf::io::compression_type::ZSTD); + input_limit_test_write_one(test_files[1], input, cudf::io::compression_type::ZSTD); // Compression with a codec that uses no scratch space at decode time. - input_limit_test_write_one(test_filenames[2], input, cudf::io::compression_type::SNAPPY); + input_limit_test_write_one(test_files[2], input, cudf::io::compression_type::SNAPPY); } -void input_limit_test_read(std::vector const& test_filenames, +void input_limit_test_read(int test_location, + std::vector const& test_files, cudf::table_view const& input, size_t output_limit, size_t input_limit, int const* expected_chunk_counts) { - CUDF_EXPECTS(test_filenames.size() == input_limit_expected_file_count, + CUDF_EXPECTS(test_files.size() == input_limit_expected_file_count, "Unexpected count of test filenames."); - for (size_t idx = 0; idx < test_filenames.size(); idx++) { - auto const result = chunked_read(test_filenames[idx], output_limit, input_limit); - EXPECT_EQ(expected_chunk_counts[idx], result.second) - << "Unexpected number of chunks produced in chunk read."; + for (size_t idx = 0; idx < test_files.size(); idx++) { + SCOPED_TRACE("Original line of failure: " + std::to_string(test_location) + + ", file idx: " + std::to_string(idx)); + auto const [result, num_chunks] = chunked_read(test_files[idx], output_limit, input_limit); + EXPECT_EQ(expected_chunk_counts[idx], num_chunks); // TODO: equal - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.first, input); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result, input); } } } // namespace + +struct OrcChunkedReaderInputLimitTest : public cudf::test::BaseFixture {}; + +TEST_F(OrcChunkedReaderInputLimitTest, SingleFixedWidthColumn) +{ + auto constexpr num_rows = 1'000'000; + auto const iter1 = thrust::make_constant_iterator(15); + auto const col1 = cudf::test::fixed_width_column_wrapper(iter1, iter1 + num_rows); + auto const input = cudf::table_view{{col1}}; + + auto const filename = std::string{"single_col_fixed_width"}; + auto const test_files = input_limit_get_test_names(temp_env->get_temp_filepath(filename)); + input_limit_test_write(test_files, input); + + // Some small limit. + { + int constexpr expected[] = {100, 100, 100}; + input_limit_test_read(__LINE__, test_files, input, 0UL, 1UL, expected); + } + + if (0) { + int constexpr expected[] = {15, 20, 9}; + input_limit_test_read(__LINE__, test_files, input, 0UL, 2 * 1024 * 1024UL, expected); + } + + // Limit of 1 byte. + if (0) { + int constexpr expected[] = {1, 50, 50}; + input_limit_test_read(__LINE__, test_files, input, 0UL, 1UL, expected); + } +} From 37aaeebfcc8078ca2229a6c88593a24e846200c7 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 1 Mar 2024 15:54:49 -0800 Subject: [PATCH 136/321] Add strong type for limits Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cpp | 226 +++++++++++++---------- 1 file changed, 125 insertions(+), 101 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cpp b/cpp/tests/io/orc_chunked_reader_test.cpp index 3174496f313..8471d2e362f 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cpp +++ b/cpp/tests/io/orc_chunked_reader_test.cpp @@ -46,6 +46,10 @@ #include namespace { +enum class output_limit : std::size_t {}; +enum class input_limit : std::size_t {}; +enum class output_row_granularity : cudf::size_type {}; + // Global environment for temporary files auto const temp_env = reinterpret_cast( ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment)); @@ -101,14 +105,16 @@ auto write_file(std::vector>& input_columns, // This means if the input file has more than 10k rows then the output chunk will never // have less than 10k rows. auto chunked_read(std::string const& filepath, - std::size_t output_limit, - std::size_t input_limit = 0, - cudf::size_type output_row_granularity = 10'000) + output_limit output_limit_bytes, + input_limit input_limit_bytes = input_limit{0}, + output_row_granularity output_granularity = output_row_granularity{10'000}) { auto const read_opts = cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).build(); - auto reader = - cudf::io::chunked_orc_reader(output_limit, input_limit, output_row_granularity, read_opts); + auto reader = cudf::io::chunked_orc_reader(static_cast(output_limit_bytes), + static_cast(input_limit_bytes), + static_cast(output_granularity), + read_opts); auto num_chunks = 0; auto out_tables = std::vector>{}; @@ -133,10 +139,10 @@ auto chunked_read(std::string const& filepath, } auto chunked_read(std::string const& filepath, - std::size_t output_limit, - cudf::size_type output_row_granularity) + output_limit output_limit_bytes, + output_row_granularity output_granularity) { - return chunked_read(filepath, output_limit, 0UL, output_row_granularity); + return chunked_read(filepath, output_limit_bytes, input_limit{0UL}, output_granularity); } } // namespace @@ -150,7 +156,7 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadNoData) input_columns.emplace_back(int64s_col{}.release()); auto const [expected, filepath] = write_file(input_columns, "chunked_read_empty", false); - auto const [result, num_chunks] = chunked_read(filepath, 1'000); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{1'000}); EXPECT_EQ(num_chunks, 1); EXPECT_EQ(result->num_rows(), 0); EXPECT_EQ(result->num_columns(), 2); @@ -176,26 +182,26 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadSimpleData) { auto const [expected, filepath] = generate_input(false, 1'000); - auto const [result, num_chunks] = chunked_read(filepath, 245'000); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{245'000}); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } { auto const [expected, filepath] = generate_input(false, cudf::io::default_stripe_size_rows); - auto const [result, num_chunks] = chunked_read(filepath, 245'000); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{245'000}); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } { auto const [expected, filepath] = generate_input(true, 1'000); - auto const [result, num_chunks] = chunked_read(filepath, 245'000); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{245'000}); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } { auto const [expected, filepath] = generate_input(true, cudf::io::default_stripe_size_rows); - auto const [result, num_chunks] = chunked_read(filepath, 245'000); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{245'000}); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } @@ -216,14 +222,14 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadBoundaryCases) // Test with zero limit: everything will be read in one chunk. { - auto const [result, num_chunks] = chunked_read(filepath, 0UL); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{0UL}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Test with a very small limit: 1 byte. { - auto const [result, num_chunks] = chunked_read(filepath, 1UL); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{1UL}); // Number of chunks is 4 because of using default `output_row_granularity = 10k`. EXPECT_EQ(num_chunks, 4); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); @@ -231,27 +237,29 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadBoundaryCases) // Test with a very small limit: 1 byte, and small value of `output_row_granularity`. { - auto const [result, num_chunks] = chunked_read(filepath, 1UL, 1'000); + auto const [result, num_chunks] = + chunked_read(filepath, output_limit{1UL}, output_row_granularity{1'000}); EXPECT_EQ(num_chunks, 40); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Test with a very small limit: 1 byte, and large value of `output_row_granularity`. { - auto const [result, num_chunks] = chunked_read(filepath, 1UL, 30'000); + auto const [result, num_chunks] = + chunked_read(filepath, output_limit{1UL}, output_row_granularity{30'000}); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Test with a very large limit { - auto const [result, num_chunks] = chunked_read(filepath, 2L << 40); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{2L << 40}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Test with a limit slightly less than one granularity segment of data // (output_row_granularity = 10k rows = 40'000 bytes). { - auto const [result, num_chunks] = chunked_read(filepath, 39'000UL); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{39'000UL}); EXPECT_EQ(num_chunks, 4); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } @@ -259,7 +267,7 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadBoundaryCases) // Test with a limit exactly the size one granularity segment of data // (output_row_granularity = 10k rows = 40'000 bytes). { - auto const [result, num_chunks] = chunked_read(filepath, 40'000UL); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{40'000UL}); EXPECT_EQ(num_chunks, 4); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } @@ -267,49 +275,49 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadBoundaryCases) // Test with a limit slightly more than one granularity segment of data // (output_row_granularity = 10k rows = 40'000 bytes). { - auto const [result, num_chunks] = chunked_read(filepath, 41'000UL); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{41'000UL}); EXPECT_EQ(num_chunks, 4); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Test with a limit slightly less than two granularity segments of data { - auto const [result, num_chunks] = chunked_read(filepath, 79'000UL); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{79'000UL}); EXPECT_EQ(num_chunks, 4); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Test with a limit exactly the size of two granularity segments of data minus 1 byte. { - auto const [result, num_chunks] = chunked_read(filepath, 79'999UL); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{79'999UL}); EXPECT_EQ(num_chunks, 4); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Test with a limit exactly the size of two granularity segments of data. { - auto const [result, num_chunks] = chunked_read(filepath, 80'000UL); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{80'000UL}); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Test with a limit slightly more the size two granularity segments of data. { - auto const [result, num_chunks] = chunked_read(filepath, 81'000); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{81'000}); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Test with a limit exactly the size of the input minus 1 byte. { - auto const [result, num_chunks] = chunked_read(filepath, 159'999UL); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{159'999UL}); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Test with a limit exactly the size of the input. { - auto const [result, num_chunks] = chunked_read(filepath, 160'000UL); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{160'000UL}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } @@ -317,8 +325,8 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadBoundaryCases) TEST_F(OrcChunkedReaderTest, TestChunkedReadWithString) { - auto constexpr num_rows = 60'000; - auto constexpr output_row_granularity = 20'000; + auto constexpr num_rows = 60'000; + auto constexpr output_granularity = output_row_granularity{20'000}; auto const generate_input = [num_rows](bool nullable) { std::vector> input_columns; @@ -357,37 +365,38 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithString) // Test with zero limit: everything will be read in one chunk. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 0UL); + auto const [result, num_chunks] = chunked_read(filepath_no_null, output_limit{0UL}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0UL); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, output_limit{0UL}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } // Test with a very small limit: 1 byte. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 1UL, output_row_granularity); + auto const [result, num_chunks] = + chunked_read(filepath_no_null, output_limit{1UL}, output_granularity); EXPECT_EQ(num_chunks, 3); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { auto const [result, num_chunks] = - chunked_read(filepath_with_nulls, 1UL, output_row_granularity); + chunked_read(filepath_with_nulls, output_limit{1UL}, output_granularity); EXPECT_EQ(num_chunks, 3); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } // Test with a very large limit. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 2L << 40); + auto const [result, num_chunks] = chunked_read(filepath_no_null, output_limit{2L << 40}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2L << 40); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, output_limit{2L << 40}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } @@ -396,24 +405,24 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithString) { auto const [result, num_chunks] = - chunked_read(filepath_no_null, 500'000UL, output_row_granularity); + chunked_read(filepath_no_null, output_limit{500'000UL}, output_granularity); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { auto const [result, num_chunks] = - chunked_read(filepath_with_nulls, 500'000UL, output_row_granularity); + chunked_read(filepath_with_nulls, output_limit{500'000UL}, output_granularity); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000UL); + auto const [result, num_chunks] = chunked_read(filepath_no_null, output_limit{1'000'000UL}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'000'000UL); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, output_limit{1'000'000UL}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } @@ -421,8 +430,8 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithString) TEST_F(OrcChunkedReaderTest, TestChunkedReadWithStructs) { - auto constexpr num_rows = 100'000; - auto constexpr output_row_granularity = 20'000; + auto constexpr num_rows = 100'000; + auto constexpr output_granularity = output_row_granularity{20'000}; auto const generate_input = [num_rows](bool nullable) { std::vector> input_columns; @@ -447,25 +456,26 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithStructs) // Test with zero limit: everything will be read in one chunk. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 0UL); + auto const [result, num_chunks] = chunked_read(filepath_no_null, output_limit{0UL}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0UL); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, output_limit{0UL}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } // Test with a very small limit: 1 byte. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 1UL, output_row_granularity); + auto const [result, num_chunks] = + chunked_read(filepath_no_null, output_limit{1UL}, output_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { auto const [result, num_chunks] = - chunked_read(filepath_with_nulls, 1UL, output_row_granularity); + chunked_read(filepath_with_nulls, output_limit{1UL}, output_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } @@ -473,13 +483,13 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithStructs) // Test with a very large limit. { auto const [result, num_chunks] = - chunked_read(filepath_no_null, 2L << 40, output_row_granularity); + chunked_read(filepath_no_null, output_limit{2L << 40}, output_granularity); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { auto const [result, num_chunks] = - chunked_read(filepath_with_nulls, 2L << 40, output_row_granularity); + chunked_read(filepath_with_nulls, output_limit{2L << 40}, output_granularity); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } @@ -488,13 +498,13 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithStructs) { auto const [result, num_chunks] = - chunked_read(filepath_no_null, 500'000UL, output_row_granularity); + chunked_read(filepath_no_null, output_limit{500'000UL}, output_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { auto const [result, num_chunks] = - chunked_read(filepath_with_nulls, 500'000UL, output_row_granularity); + chunked_read(filepath_with_nulls, output_limit{500'000UL}, output_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } @@ -502,8 +512,8 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithStructs) TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsNoNulls) { - auto constexpr num_rows = 100'000; - auto constexpr output_row_granularity = 20'000; + auto constexpr num_rows = 100'000; + auto constexpr output_granularity = output_row_granularity{20'000}; auto const [expected, filepath] = [num_rows]() { std::vector> input_columns; @@ -529,49 +539,54 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsNoNulls) // Test with zero limit: everything will be read in one chunk. { - auto const [result, num_chunks] = chunked_read(filepath, 0UL); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{0UL}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Test with a very small limit: 1 byte. { - auto const [result, num_chunks] = chunked_read(filepath, 1UL, output_row_granularity); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{1UL}, output_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Test with a very large limit. { - auto const [result, num_chunks] = chunked_read(filepath, 2L << 40UL, output_row_granularity); + auto const [result, num_chunks] = + chunked_read(filepath, output_limit{2L << 40UL}, output_granularity); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Chunk size slightly less than 1 row segment (forcing it to be at least 1 segment per read). { - auto const [result, num_chunks] = chunked_read(filepath, 199'999UL, output_row_granularity); + auto const [result, num_chunks] = + chunked_read(filepath, output_limit{199'999UL}, output_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Chunk size exactly 1 row segment. { - auto const [result, num_chunks] = chunked_read(filepath, 200'000UL, output_row_granularity); + auto const [result, num_chunks] = + chunked_read(filepath, output_limit{200'000UL}, output_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Chunk size == size of 2 segments. Totally have 3 chunks. { - auto const [result, num_chunks] = chunked_read(filepath, 400'000UL, output_row_granularity); + auto const [result, num_chunks] = + chunked_read(filepath, output_limit{400'000UL}, output_granularity); EXPECT_EQ(num_chunks, 3); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Chunk size == size of 2 segment minus one byte: each chunk will be just one segment. { - auto const [result, num_chunks] = chunked_read(filepath, 399'999UL, output_row_granularity); + auto const [result, num_chunks] = + chunked_read(filepath, output_limit{399'999UL}, output_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } @@ -579,8 +594,8 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsNoNulls) TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsHavingNulls) { - auto constexpr num_rows = 100'000; - auto constexpr output_row_granularity = 20'000; + auto constexpr num_rows = 100'000; + auto constexpr output_granularity = output_row_granularity{20'000}; auto const [expected, filepath] = [num_rows]() { std::vector> input_columns; @@ -610,49 +625,54 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsHavingNulls) // Test with zero limit: everything will be read in one chunk. { - auto const [result, num_chunks] = chunked_read(filepath, 0UL); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{0UL}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Test with a very small limit: 1 byte. { - auto const [result, num_chunks] = chunked_read(filepath, 1UL, output_row_granularity); + auto const [result, num_chunks] = chunked_read(filepath, output_limit{1UL}, output_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Test with a very large limit. { - auto const [result, num_chunks] = chunked_read(filepath, 2L << 40, output_row_granularity); + auto const [result, num_chunks] = + chunked_read(filepath, output_limit{2L << 40}, output_granularity); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Chunk size slightly less than 1 row segment (forcing it to be at least 1 segment per read). { - auto const [result, num_chunks] = chunked_read(filepath, 142'499UL, output_row_granularity); + auto const [result, num_chunks] = + chunked_read(filepath, output_limit{142'499UL}, output_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Chunk size exactly 1 row segment. { - auto const [result, num_chunks] = chunked_read(filepath, 142'500UL, output_row_granularity); + auto const [result, num_chunks] = + chunked_read(filepath, output_limit{142'500UL}, output_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Chunk size == size of 2 segments. Totally have 3 chunks. { - auto const [result, num_chunks] = chunked_read(filepath, 285'000UL, output_row_granularity); + auto const [result, num_chunks] = + chunked_read(filepath, output_limit{285'000UL}, output_granularity); EXPECT_EQ(num_chunks, 3); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } // Chunk size == size of 2 segment minus one byte: each chunk will be just one segment. { - auto const [result, num_chunks] = chunked_read(filepath, 284'999UL, output_row_granularity); + auto const [result, num_chunks] = + chunked_read(filepath, output_limit{284'999UL}, output_granularity); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } @@ -698,36 +718,36 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithStructsOfLists) // Test with zero limit: everything will be read in one chunk. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 0UL); + auto const [result, num_chunks] = chunked_read(filepath_no_null, output_limit{0UL}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0UL); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, output_limit{0UL}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } // Test with a very small limit: 1 byte. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 1UL); + auto const [result, num_chunks] = chunked_read(filepath_no_null, output_limit{1UL}); EXPECT_EQ(num_chunks, 10); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1UL); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, output_limit{1UL}); EXPECT_EQ(num_chunks, 10); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } // Test with a very large limit. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 2L << 40); + auto const [result, num_chunks] = chunked_read(filepath_no_null, output_limit{2L << 40}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2L << 40); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, output_limit{2L << 40}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } @@ -735,49 +755,49 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithStructsOfLists) // Other tests: { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000UL); + auto const [result, num_chunks] = chunked_read(filepath_no_null, output_limit{1'000'000UL}); EXPECT_EQ(num_chunks, 10); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'500'000UL); + auto const [result, num_chunks] = chunked_read(filepath_no_null, output_limit{1'500'000UL}); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 2'000'000UL); + auto const [result, num_chunks] = chunked_read(filepath_no_null, output_limit{2'000'000UL}); EXPECT_EQ(num_chunks, 4); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 5'000'000UL); + auto const [result, num_chunks] = chunked_read(filepath_no_null, output_limit{5'000'000UL}); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'000'000UL); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, output_limit{1'000'000UL}); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'500'000UL); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, output_limit{1'500'000UL}); EXPECT_EQ(num_chunks, 4); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2'000'000UL); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, output_limit{2'000'000UL}); EXPECT_EQ(num_chunks, 3); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 5'000'000UL); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, output_limit{5'000'000UL}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } @@ -830,36 +850,36 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsOfStructs) // Test with zero limit: everything will be read in one chunk. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 0UL); + auto const [result, num_chunks] = chunked_read(filepath_no_null, output_limit{0UL}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0UL); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, output_limit{0UL}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } // Test with a very small limit: 1 byte. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 1UL); + auto const [result, num_chunks] = chunked_read(filepath_no_null, output_limit{1UL}); EXPECT_EQ(num_chunks, 10); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1UL); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, output_limit{1UL}); EXPECT_EQ(num_chunks, 10); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } // Test with a very large limit. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 2L << 40); + auto const [result, num_chunks] = chunked_read(filepath_no_null, output_limit{2L << 40}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2L << 40); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, output_limit{2L << 40}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } @@ -867,49 +887,49 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsOfStructs) // Other tests. { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000UL); + auto const [result, num_chunks] = chunked_read(filepath_no_null, output_limit{1'000'000UL}); EXPECT_EQ(num_chunks, 7); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'500'000UL); + auto const [result, num_chunks] = chunked_read(filepath_no_null, output_limit{1'500'000UL}); EXPECT_EQ(num_chunks, 4); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 2'000'000UL); + auto const [result, num_chunks] = chunked_read(filepath_no_null, output_limit{2'000'000UL}); EXPECT_EQ(num_chunks, 3); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_no_null, 5'000'000UL); + auto const [result, num_chunks] = chunked_read(filepath_no_null, output_limit{5'000'000UL}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'000'000UL); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, output_limit{1'000'000UL}); EXPECT_EQ(num_chunks, 5); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'500'000UL); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, output_limit{1'500'000UL}); EXPECT_EQ(num_chunks, 3); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2'000'000UL); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, output_limit{2'000'000UL}); EXPECT_EQ(num_chunks, 2); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } { - auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 5'000'000UL); + auto const [result, num_chunks] = chunked_read(filepath_with_nulls, output_limit{5'000'000UL}); EXPECT_EQ(num_chunks, 1); CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result); } @@ -987,8 +1007,8 @@ void input_limit_test_write(std::vector const& test_files, void input_limit_test_read(int test_location, std::vector const& test_files, cudf::table_view const& input, - size_t output_limit, - size_t input_limit, + output_limit output_limit_bytes, + input_limit input_limit_bytes, int const* expected_chunk_counts) { CUDF_EXPECTS(test_files.size() == input_limit_expected_file_count, @@ -997,7 +1017,8 @@ void input_limit_test_read(int test_location, for (size_t idx = 0; idx < test_files.size(); idx++) { SCOPED_TRACE("Original line of failure: " + std::to_string(test_location) + ", file idx: " + std::to_string(idx)); - auto const [result, num_chunks] = chunked_read(test_files[idx], output_limit, input_limit); + auto const [result, num_chunks] = + chunked_read(test_files[idx], output_limit_bytes, input_limit_bytes); EXPECT_EQ(expected_chunk_counts[idx], num_chunks); // TODO: equal CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result, input); @@ -1022,17 +1043,20 @@ TEST_F(OrcChunkedReaderInputLimitTest, SingleFixedWidthColumn) // Some small limit. { int constexpr expected[] = {100, 100, 100}; - input_limit_test_read(__LINE__, test_files, input, 0UL, 1UL, expected); + input_limit_test_read( + __LINE__, test_files, input, output_limit{0UL}, input_limit{1UL}, expected); } if (0) { int constexpr expected[] = {15, 20, 9}; - input_limit_test_read(__LINE__, test_files, input, 0UL, 2 * 1024 * 1024UL, expected); + input_limit_test_read( + __LINE__, test_files, input, output_limit{0UL}, input_limit{2 * 1024 * 1024UL}, expected); } // Limit of 1 byte. if (0) { int constexpr expected[] = {1, 50, 50}; - input_limit_test_read(__LINE__, test_files, input, 0UL, 1UL, expected); + input_limit_test_read( + __LINE__, test_files, input, output_limit{0UL}, input_limit{1UL}, expected); } } From 4531ab3c0aeda58338d1d982661c08c6dce62f49 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 1 Mar 2024 15:57:46 -0800 Subject: [PATCH 137/321] Fix test check Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/tests/io/orc_chunked_reader_test.cpp b/cpp/tests/io/orc_chunked_reader_test.cpp index 8471d2e362f..2405c0eea90 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cpp +++ b/cpp/tests/io/orc_chunked_reader_test.cpp @@ -130,6 +130,10 @@ auto chunked_read(std::string const& filepath, out_tables.emplace_back(std::move(chunk.tbl)); } while (reader.has_next()); + if (num_chunks > 1) { + CUDF_EXPECTS(out_tables.front()->num_rows() != 0, "Number of rows in the new chunk is zero."); + } + auto out_tviews = std::vector{}; for (auto const& tbl : out_tables) { out_tviews.emplace_back(tbl->view()); From 9a80faf4142c7c4c89254f873d0d4bd6ebda4653 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 1 Mar 2024 16:31:30 -0800 Subject: [PATCH 138/321] Cleanup Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 124 ------------------------- 1 file changed, 124 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 5aa499fd9e9..9b8dfce2f67 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -66,130 +66,6 @@ // // // -namespace cudf::experimental { - -enum class decompose_lists_column : bool { YES, NO }; - -auto decompose_structs(table_view table, - decompose_lists_column decompose_lists, - host_span column_order = {}, - host_span null_precedence = {}) -{ - auto linked_columns = detail::table_to_linked_columns(table); - - std::vector verticalized_columns; - std::vector new_column_order; - std::vector new_null_precedence; - std::vector verticalized_col_depths; - for (size_t col_idx = 0; col_idx < linked_columns.size(); ++col_idx) { - detail::linked_column_view const* col = linked_columns[col_idx].get(); - if (is_nested(col->type())) { - // convert and insert - std::vector> flattened; - std::function*, int)> - recursive_child = [&](detail::linked_column_view const* c, - std::vector* branch, - int depth) { - branch->push_back(c); - if (decompose_lists == decompose_lists_column::YES && c->type().id() == type_id::LIST) { - recursive_child( - c->children[lists_column_view::child_column_index].get(), branch, depth + 1); - } else if (c->type().id() == type_id::STRUCT) { - for (size_t child_idx = 0; child_idx < c->children.size(); ++child_idx) { - // When child_idx == 0, we also cut off the current branch if its first child is a - // lists column. - // In such cases, the last column of the current branch will be `Struct` and - // it will be modified to empty struct type `Struct<>` later on. - if (child_idx > 0 || c->children[0]->type().id() == type_id::LIST) { - verticalized_col_depths.push_back(depth + 1); - branch = &flattened.emplace_back(); - } - recursive_child(c->children[child_idx].get(), branch, depth + 1); - } - } - }; - auto& branch = flattened.emplace_back(); - verticalized_col_depths.push_back(0); - recursive_child(col, &branch, 0); - - for (auto const& branch : flattened) { - column_view temp_col = *branch.back(); - - // Change `Struct` into empty struct type `Struct<>`. - if (temp_col.type().id() == type_id::STRUCT && - (temp_col.num_children() > 0 && temp_col.child(0).type().id() == type_id::LIST)) { - temp_col = column_view(temp_col.type(), - temp_col.size(), - temp_col.head(), - temp_col.null_mask(), - temp_col.null_count(), - temp_col.offset(), - {}); - } - - for (auto it = branch.crbegin() + 1; it < branch.crend(); ++it) { - auto const& prev_col = *(*it); - auto children = - (prev_col.type().id() == type_id::LIST) - ? std::vector{*prev_col - .children[lists_column_view::offsets_column_index], - temp_col} - : std::vector{temp_col}; - temp_col = column_view(prev_col.type(), - prev_col.size(), - nullptr, - prev_col.null_mask(), - prev_col.null_count(), - prev_col.offset(), - std::move(children)); - } - // Traverse upward and include any list columns in the ancestors - for (detail::linked_column_view* parent = branch.front()->parent; parent; - parent = parent->parent) { - if (parent->type().id() == type_id::LIST) { - // Include this parent - temp_col = column_view( - parent->type(), - parent->size(), - nullptr, // list has no data of its own - nullptr, // If we're going through this then nullmask is already in another branch - 0, - parent->offset(), - {*parent->children[lists_column_view::offsets_column_index], temp_col}); - } else if (parent->type().id() == type_id::STRUCT) { - // Replace offset with parent's offset - temp_col = column_view(temp_col.type(), - parent->size(), - temp_col.head(), - temp_col.null_mask(), - temp_col.null_count(), - parent->offset(), - {temp_col.child_begin(), temp_col.child_end()}); - } - } - verticalized_columns.push_back(temp_col); - } - if (not column_order.empty()) { - new_column_order.insert(new_column_order.end(), flattened.size(), column_order[col_idx]); - } - if (not null_precedence.empty()) { - new_null_precedence.insert( - new_null_precedence.end(), flattened.size(), null_precedence[col_idx]); - } - } else { - verticalized_columns.push_back(*col); - verticalized_col_depths.push_back(0); - if (not column_order.empty()) { new_column_order.push_back(column_order[col_idx]); } - if (not null_precedence.empty()) { new_null_precedence.push_back(null_precedence[col_idx]); } - } - } - return std::make_tuple(table_view(verticalized_columns), - std::move(new_column_order), - std::move(new_null_precedence), - std::move(verticalized_col_depths)); -} -} // namespace cudf::experimental namespace cudf::io::orc::detail { From 6279ad65e681d0a51258a6ee12823254d210f4c6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 1 Mar 2024 21:48:34 -0800 Subject: [PATCH 139/321] Fix bug in stream data access Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 44 +++++++++++++++++++++++-- cpp/src/io/orc/reader_impl_chunking.cu | 21 ++++++++---- cpp/src/io/orc/reader_impl_chunking.hpp | 4 +++ 3 files changed, 60 insertions(+), 9 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index a7c9d1ab635..dc2f9fbdebe 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -512,6 +512,19 @@ void decode_stream_data(std::size_t num_dicts, auto const tz_table_dptr = table_device_view::create(tz_table, stream); rmm::device_scalar error_count(0, stream); // Update the null map for child columns + + // printf( + // "num col: %d, num stripe: %d, skip row: %d, row_groups size: %d, row index stride: %d, " + // "level: " + // "%d\n", + // (int)num_columns, + // (int)num_stripes, + // (int)skip_rows, + // (int)row_groups.size().first, + // (int)row_index_stride, + // (int)level + // ); + gpu::DecodeOrcColumnData(chunks.base_device_ptr(), global_dict.data(), row_groups, @@ -917,9 +930,14 @@ void reader::impl::decompress_and_decode() #endif + auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_chunks; + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { printf("processing level = %d\n", (int)level); + auto const& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; + auto const [stream_begin, stream_end] = get_range(stripe_stream_chunks, stripe_chunk); + auto& columns_level = _selected_columns.levels[level]; // TODO: do it in global step @@ -1003,6 +1021,10 @@ void reader::impl::decompress_and_decode() auto const stripe_info = stripe.stripe_info; auto const stripe_footer = stripe.stripe_footer; + // printf("stripeinfo->indexLength: %d, data: %d\n", + // (int)stripe_info->indexLength, + // (int)stripe_info->dataLength); + auto const total_data_size = gather_stream_info_and_column_desc(stripe_idx - stripe_start, level, stripe_info, @@ -1088,12 +1110,20 @@ void reader::impl::decompress_and_decode() ? sizeof(size_type) : cudf::size_of(column_types[col_idx]); chunk.num_rowgroups = stripe_num_rowgroups; + // printf("stripe_num_rowgroups: %d\n", (int)stripe_num_rowgroups); + if (chunk.type_kind == orc::TIMESTAMP) { chunk.timestamp_type_id = _config.timestamp_type.id(); } if (not is_stripe_data_empty) { for (int k = 0; k < gpu::CI_NUM_STREAMS; k++) { - chunk.streams[k] = dst_base + stream_info[chunk.strm_id[k] + stripe_start].dst_pos; + chunk.streams[k] = dst_base + stream_info[chunk.strm_id[k] + stream_begin].dst_pos; + // printf("chunk.streams[%d] of chunk.strm_id[%d], stripe %d | %d, collect from %d\n", + // (int)k, + // (int)chunk.strm_id[k], + // (int)stripe_idx, + // (int)stripe_start, + // (int)(chunk.strm_id[k] + stream_begin)); } } } @@ -1137,8 +1167,9 @@ void reader::impl::decompress_and_decode() // Setup row group descriptors if using indexes if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { + // printf("decompress----------------------\n"); // printf("line %d\n", __LINE__); - // fflush(stdout); + fflush(stdout); auto decomp_data = decompress_stripe_data(stripe_chunk, _file_itm_data.compinfo_map, *_metadata.per_file_metadata[0].decompressor, @@ -1157,7 +1188,11 @@ void reader::impl::decompress_and_decode() // fflush(stdout); } else { + // printf("no decompression----------------------\n"); + if (row_groups.size().first) { + // printf("line %d\n", __LINE__); + // fflush(stdout); chunks.host_to_device_async(_stream); row_groups.host_to_device_async(_stream); row_groups.host_to_device_async(_stream); @@ -1187,7 +1222,7 @@ void reader::impl::decompress_and_decode() auto is_list_type = (column_types[i].id() == type_id::LIST); auto n_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[i]; - // printf(" create child col, num rows: %d\n", (int)n_rows); + // printf(" create col, num rows: %d\n", (int)n_rows); // For list column, offset column will be always size + 1 if (is_list_type) n_rows++; @@ -1258,6 +1293,9 @@ void reader::impl::decompress_and_decode() }); _chunk_read_data.decoded_table = std::make_unique
(std::move(out_columns)); + // printf("col: \n"); + // cudf::test::print(_chunk_read_data.decoded_table->get_column(0).view()); + // DEBUG only // _chunk_read_data.output_size_limit = _chunk_read_data.data_read_limit / 3; diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 9b8dfce2f67..e176e32f561 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -142,7 +142,14 @@ std::size_t gather_stream_info_and_column_desc( if (src_offset >= stripeinfo->indexLength || use_index) { auto const index_type = get_stream_index_type(stream.kind); if (index_type < gpu::CI_NUM_STREAMS) { - auto& chunk = (*chunks.value())[stripe_index][col]; + auto& chunk = (*chunks.value())[stripe_index][col]; + // printf("use stream id: %d, stripe: %d, level: %d, col idx: %d, kind: %d\n", + // (int)(*stream_idx), + // (int)stripe_index, + // (int)level, + // (int)column_id, + // (int)stream.kind); + chunk.strm_id[index_type] = *stream_idx; chunk.strm_len[index_type] = stream.length; // NOTE: skip_count field is temporarily used to track the presence of index streams @@ -155,6 +162,7 @@ std::size_t gather_stream_info_and_column_desc( } } } + (*stream_idx)++; } else { // not chunks.has_value() // printf("collect stream id: stripe: %d, level: %d, col idx: %d, kind: %d\n", @@ -302,6 +310,8 @@ void verify_splits(host_span splits, } #endif +} // namespace + /** * @brief Find range of the data span by a given chunk of chunks. * @@ -327,8 +337,6 @@ std::pair get_range(std::vector const& input_chunks, return {begin, end}; } -} // namespace - void reader::impl::global_preprocess(uint64_t skip_rows, std::optional const& num_rows_opt, std::vector> const& stripes) @@ -556,6 +564,7 @@ void reader::impl::load_data() auto& stripe_data = lvl_stripe_data[level]; auto& stripe_sizes = lvl_stripe_sizes[level]; for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { + // TODO: only do this if it was not allocated before. stripe_data[stripe_idx] = rmm::device_buffer( cudf::util::round_up_safe(stripe_sizes[stripe_idx], BUFFER_PADDING_MULTIPLE), _stream); } @@ -666,7 +675,7 @@ void reader::impl::load_data() stream_compinfo->max_uncompressed_size; #ifdef PRINT_DEBUG printf("cache info [%d, %d, %d, %d]: %lu | %lu | %lu\n", - (int)stream_id.id.stripe_idx, + (int)stream_id.stripe_idx, (int)stream_id.level, (int)stream_id.orc_col_idx, (int)stream_id.kind, @@ -681,8 +690,8 @@ void reader::impl::load_data() stream_compinfo_map.clear(); } else { - // printf("no compression \n"); - // fflush(stdout); + printf("no compression \n"); + fflush(stdout); // Set decompression size equal to the input size. for (auto stream_idx = stream_begin; stream_idx < stream_end; ++stream_idx) { diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index a721226b78b..18fcbf25bdb 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -270,6 +270,10 @@ std::vector find_splits(host_span sizes, int64_t total_count, size_t size_limit); +// TODO +std::pair get_range(std::vector const& input_chunks, + chunk const& selected_chunks); + /** * @brief Function that populates descriptors for either individual streams or chunks of column * data, but not both. From 3a89549ee161d1057e288d6bee366437d37e1774 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 1 Mar 2024 21:56:59 -0800 Subject: [PATCH 140/321] Add temp docs Signed-off-by: Nghia Truong --- cpp/include/cudf/io/detail/orc.hpp | 28 ++++++++++++++++++++++++ cpp/include/cudf/io/orc.hpp | 35 ++++++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index ac024caf1f3..c6176021a79 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -120,6 +120,34 @@ class chunked_reader : private reader { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); + /** + * @brief Constructor from size limits and an array of data sources with reader options. + * + * The typical usage should be similar to this: + * ``` + * do { + * auto const chunk = reader.read_chunk(); + * // Process chunk + * } while (reader.has_next()); + * + * ``` + * + * If `output_size_limit == 0` (i.e., no reading limit), a call to `read_chunk()` will read the + * whole file and return a table containing all rows. + * + * TODO: data read limit + * TODO: granularity + * + * @param output_size_limit Limit on total number of bytes to be returned per read, + * or `0` if there is no limit + * @param data_read_limit Limit on memory usage for the purposes of decompression and processing + * of input, or `0` if there is no limit + * @param output_row_granularity TODO + * @param sources Input `datasource` objects to read the dataset from + * @param options Settings for controlling reading behavior + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource to use for device memory allocation + */ explicit chunked_reader(std::size_t output_size_limit, std::size_t data_read_limit, size_type output_row_granularity, diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index cfab642f25d..19252e77b91 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -423,7 +423,21 @@ class chunked_orc_reader { */ chunked_orc_reader() = default; - // TODO + /** + * @brief Constructor for chunked reader. + * + * This constructor requires the same `orc_reader_option` parameter as in + * `cudf::read_orc()`, and additional parameters to specify the size byte limits of the + * output table for each reading. + * + * TODO: data read limit + * + * @param output_size_limit Limit on total number of bytes to be returned per read, + * or `0` if there is no limit + * @param options The options used to read Parquet file + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource to use for device memory allocation + */ chunked_orc_reader(std::size_t output_size_limit, orc_reader_options const& options, rmm::cuda_stream_view stream = cudf::get_default_stream(), @@ -452,7 +466,24 @@ class chunked_orc_reader { rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - // TODO + /** + * @brief Constructor for chunked reader. + * + * This constructor requires the same `orc_reader_option` parameter as in + * `cudf::read_orc()`, and additional parameters to specify the size byte limits of the + * output table for each reading. + * + * TODO: data read limit + * + * @param output_size_limit Limit on total number of bytes to be returned per read, + * or `0` if there is no limit + * @param data_read_limit Limit on memory usage for the purposes of decompression and processing + * of input, or `0` if there is no limit + * @param output_row_granularity TODO + * @param options The options used to read Parquet file + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource to use for device memory allocation + */ chunked_orc_reader(std::size_t output_size_limit, std::size_t data_read_limit, size_type output_row_granularity, From d1cc44c8aa00319d7f8d728a1825fb658de6a1d2 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 2 Mar 2024 08:24:57 -0800 Subject: [PATCH 141/321] Add new tests Signed-off-by: Nghia Truong --- cpp/tests/CMakeLists.txt | 2 +- cpp/tests/io/orc_chunked_reader_test.cpp | 52 ++++++++++++++++++------ 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index c8f490df02b..511705855f5 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -287,7 +287,7 @@ ConfigureTest( PERCENT 30 ) ConfigureTest( - ORC_TEST io/orc_chunked_reader_test.cpp io/orc_test.cpp + ORC_TEST io/orc_chunked_reader_test.cpp GPUS 1 PERCENT 30 ) diff --git a/cpp/tests/io/orc_chunked_reader_test.cpp b/cpp/tests/io/orc_chunked_reader_test.cpp index 2405c0eea90..9298b95616e 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cpp +++ b/cpp/tests/io/orc_chunked_reader_test.cpp @@ -997,14 +997,14 @@ void input_limit_test_write(std::vector const& test_files, CUDF_EXPECTS(test_files.size() == input_limit_expected_file_count, "Unexpected count of test filenames."); - // No compression + // ZSTD yields a very small decompression size, can be much smaller than SNAPPY. + // However, ORC reader typically over-estimates the decompression size of data + // compressed by ZSTD to be very large, can be much larger than that of SNAPPY. + // That is because ZSTD may use a lot of scratch space at decode time + // (2.5x the total decompressed buffer size). + // As such, we may see smaller output chunks for the input data compressed by ZSTD. input_limit_test_write_one(test_files[0], input, cudf::io::compression_type::NONE); - - // Compression with a codec that uses a lot of scratch space at decode time (2.5x the total - // decompressed buffer size). input_limit_test_write_one(test_files[1], input, cudf::io::compression_type::ZSTD); - - // Compression with a codec that uses no scratch space at decode time. input_limit_test_write_one(test_files[2], input, cudf::io::compression_type::SNAPPY); } @@ -1018,7 +1018,7 @@ void input_limit_test_read(int test_location, CUDF_EXPECTS(test_files.size() == input_limit_expected_file_count, "Unexpected count of test filenames."); - for (size_t idx = 0; idx < test_files.size(); idx++) { + for (size_t idx = 0; idx < test_files.size(); ++idx) { SCOPED_TRACE("Original line of failure: " + std::to_string(test_location) + ", file idx: " + std::to_string(idx)); auto const [result, num_chunks] = @@ -1044,23 +1044,51 @@ TEST_F(OrcChunkedReaderInputLimitTest, SingleFixedWidthColumn) auto const test_files = input_limit_get_test_names(temp_env->get_temp_filepath(filename)); input_limit_test_write(test_files, input); - // Some small limit. { int constexpr expected[] = {100, 100, 100}; input_limit_test_read( __LINE__, test_files, input, output_limit{0UL}, input_limit{1UL}, expected); } - if (0) { + { int constexpr expected[] = {15, 20, 9}; input_limit_test_read( __LINE__, test_files, input, output_limit{0UL}, input_limit{2 * 1024 * 1024UL}, expected); } +} - // Limit of 1 byte. - if (0) { - int constexpr expected[] = {1, 50, 50}; +TEST_F(OrcChunkedReaderInputLimitTest, MixedColumns) +{ + auto constexpr num_rows = 1'000'000; + + auto const iter1 = thrust::make_counting_iterator(0); + auto const col1 = cudf::test::fixed_width_column_wrapper(iter1, iter1 + num_rows); + + auto const iter2 = thrust::make_counting_iterator(0); + auto const col2 = cudf::test::fixed_width_column_wrapper(iter2, iter2 + num_rows); + + auto const strings = std::vector{"abc", "de", "fghi"}; + auto const str_iter = cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { + if (i < 250000) { return strings[0]; } + if (i < 750000) { return strings[1]; } + return strings[2]; + }); + auto const col3 = strings_col(str_iter, str_iter + num_rows); + + auto const filename = std::string{"single_col_fixed_width"}; + auto const test_files = input_limit_get_test_names(temp_env->get_temp_filepath(filename)); + auto const input = cudf::table_view{{col1, col2, col3}}; + input_limit_test_write(test_files, input); + + { + int constexpr expected[] = {100, 100, 100}; input_limit_test_read( __LINE__, test_files, input, output_limit{0UL}, input_limit{1UL}, expected); } + + { + int constexpr expected[] = {15, 100, 21}; + input_limit_test_read( + __LINE__, test_files, input, output_limit{0UL}, input_limit{2 * 1024 * 1024UL}, expected); + } } From ac97dc2ef17c9fb4a912549f3384123b058c486f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 2 Mar 2024 12:54:49 -0800 Subject: [PATCH 142/321] Add test Signed-off-by: Nghia Truong --- ...er_test.cpp => orc_chunked_reader_test.cu} | 70 ++++++++++++++++++- 1 file changed, 68 insertions(+), 2 deletions(-) rename cpp/tests/io/{orc_chunked_reader_test.cpp => orc_chunked_reader_test.cu} (95%) diff --git a/cpp/tests/io/orc_chunked_reader_test.cpp b/cpp/tests/io/orc_chunked_reader_test.cu similarity index 95% rename from cpp/tests/io/orc_chunked_reader_test.cpp rename to cpp/tests/io/orc_chunked_reader_test.cu index 9298b95616e..d5790b27327 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cpp +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -39,6 +39,7 @@ #include #include +#include #include @@ -1038,10 +1039,10 @@ TEST_F(OrcChunkedReaderInputLimitTest, SingleFixedWidthColumn) auto constexpr num_rows = 1'000'000; auto const iter1 = thrust::make_constant_iterator(15); auto const col1 = cudf::test::fixed_width_column_wrapper(iter1, iter1 + num_rows); - auto const input = cudf::table_view{{col1}}; auto const filename = std::string{"single_col_fixed_width"}; auto const test_files = input_limit_get_test_names(temp_env->get_temp_filepath(filename)); + auto const input = cudf::table_view{{col1}}; input_limit_test_write(test_files, input); { @@ -1075,7 +1076,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, MixedColumns) }); auto const col3 = strings_col(str_iter, str_iter + num_rows); - auto const filename = std::string{"single_col_fixed_width"}; + auto const filename = std::string{"mixed_columns"}; auto const test_files = input_limit_get_test_names(temp_env->get_temp_filepath(filename)); auto const input = cudf::table_view{{col1, col2, col3}}; input_limit_test_write(test_files, input); @@ -1092,3 +1093,68 @@ TEST_F(OrcChunkedReaderInputLimitTest, MixedColumns) __LINE__, test_files, input, output_limit{0UL}, input_limit{2 * 1024 * 1024UL}, expected); } } + +namespace { + +struct offset_gen { + int const group_size; + __device__ int operator()(int i) const { return i * group_size; } +}; + +template +struct value_gen { + __device__ T operator()(int i) const { return i % 1024; } +}; + +#if 0 +struct char_values { + __device__ int8_t operator()(int i) const + { + int const index = (i / 2) % 3; + // Generate repeating 3-runs of 2 values each: "aabbccaabbcc...". + return index == 0 ? 'a' : (index == 1 ? 'b' : 'c'); + } +}; +#endif + +} // namespace + +TEST_F(OrcChunkedReaderInputLimitTest, ListType) +{ + int constexpr num_rows = 50'000'000; + int constexpr list_size = 4; + + auto const stream = cudf::get_default_stream(); + auto const iter = thrust::make_counting_iterator(0); + + auto offset_col = cudf::make_fixed_width_column( + cudf::data_type{cudf::type_id::INT32}, num_rows + 1, cudf::mask_state::UNALLOCATED); + thrust::transform(rmm::exec_policy(stream), + iter, + iter + num_rows + 1, + offset_col->mutable_view().begin(), + offset_gen{list_size}); + + int constexpr num_ints = num_rows * list_size; + auto value_col = cudf::make_fixed_width_column( + cudf::data_type{cudf::type_id::INT32}, num_ints, cudf::mask_state::UNALLOCATED); + thrust::transform(rmm::exec_policy(stream), + iter, + iter + num_ints, + value_col->mutable_view().begin(), + value_gen{}); + + auto const lists_col = + cudf::make_lists_column(num_rows, std::move(offset_col), std::move(value_col), 0, {}, stream); + + auto const filename = std::string{"list_type"}; + auto const test_files = input_limit_get_test_names(temp_env->get_temp_filepath(filename)); + auto const input = cudf::table_view{{*lists_col}}; + input_limit_test_write(test_files, input); + + { + int constexpr expected[] = {5000, 5000, 5000}; + input_limit_test_read( + __LINE__, test_files, input, output_limit{0UL}, input_limit{1UL}, expected); + } +} From 9b2bbaa3cf37859f5f425302db6c88ec1ed6a94f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 2 Mar 2024 12:57:07 -0800 Subject: [PATCH 143/321] Allow to control number of rows per stripe Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cu | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index d5790b27327..ba1f5c891bb 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -983,17 +983,20 @@ std::vector input_limit_get_test_names(std::string const& base_file void input_limit_test_write_one(std::string const& filepath, cudf::table_view const& input, + cudf::size_type stripe_size_rows, cudf::io::compression_type compression) { auto const out_opts = cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, input) .compression(compression) - .stripe_size_rows(10'000) // intentionally write small stripes + .stripe_size_rows(stripe_size_rows) .build(); cudf::io::write_orc(out_opts); } -void input_limit_test_write(std::vector const& test_files, - cudf::table_view const& input) +void input_limit_test_write( + std::vector const& test_files, + cudf::table_view const& input, + cudf::size_type stripe_size_rows = 10'000 /*write small stripes by default*/) { CUDF_EXPECTS(test_files.size() == input_limit_expected_file_count, "Unexpected count of test filenames."); @@ -1004,9 +1007,12 @@ void input_limit_test_write(std::vector const& test_files, // That is because ZSTD may use a lot of scratch space at decode time // (2.5x the total decompressed buffer size). // As such, we may see smaller output chunks for the input data compressed by ZSTD. - input_limit_test_write_one(test_files[0], input, cudf::io::compression_type::NONE); - input_limit_test_write_one(test_files[1], input, cudf::io::compression_type::ZSTD); - input_limit_test_write_one(test_files[2], input, cudf::io::compression_type::SNAPPY); + input_limit_test_write_one( + test_files[0], input, stripe_size_rows, cudf::io::compression_type::NONE); + input_limit_test_write_one( + test_files[1], input, stripe_size_rows, cudf::io::compression_type::ZSTD); + input_limit_test_write_one( + test_files[2], input, stripe_size_rows, cudf::io::compression_type::SNAPPY); } void input_limit_test_read(int test_location, From a959db2f49bed04f8da28c814e189c98e81dd6fa Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 2 Mar 2024 13:03:45 -0800 Subject: [PATCH 144/321] Write a bit larger stripes to test Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cu | 32 +++++++++++++++++++------ 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index ba1f5c891bb..21210426b24 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -996,7 +996,7 @@ void input_limit_test_write_one(std::string const& filepath, void input_limit_test_write( std::vector const& test_files, cudf::table_view const& input, - cudf::size_type stripe_size_rows = 10'000 /*write small stripes by default*/) + cudf::size_type stripe_size_rows = 20'000 /*write relatively small stripes by default*/) { CUDF_EXPECTS(test_files.size() == input_limit_expected_file_count, "Unexpected count of test filenames."); @@ -1052,13 +1052,13 @@ TEST_F(OrcChunkedReaderInputLimitTest, SingleFixedWidthColumn) input_limit_test_write(test_files, input); { - int constexpr expected[] = {100, 100, 100}; + int constexpr expected[] = {50, 50, 50}; input_limit_test_read( __LINE__, test_files, input, output_limit{0UL}, input_limit{1UL}, expected); } { - int constexpr expected[] = {15, 20, 9}; + int constexpr expected[] = {17, 10, 9}; input_limit_test_read( __LINE__, test_files, input, output_limit{0UL}, input_limit{2 * 1024 * 1024UL}, expected); } @@ -1088,13 +1088,13 @@ TEST_F(OrcChunkedReaderInputLimitTest, MixedColumns) input_limit_test_write(test_files, input); { - int constexpr expected[] = {100, 100, 100}; + int constexpr expected[] = {50, 50, 50}; input_limit_test_read( __LINE__, test_files, input, output_limit{0UL}, input_limit{1UL}, expected); } { - int constexpr expected[] = {15, 100, 21}; + int constexpr expected[] = {17, 50, 14}; input_limit_test_read( __LINE__, test_files, input, output_limit{0UL}, input_limit{2 * 1024 * 1024UL}, expected); } @@ -1156,11 +1156,29 @@ TEST_F(OrcChunkedReaderInputLimitTest, ListType) auto const filename = std::string{"list_type"}; auto const test_files = input_limit_get_test_names(temp_env->get_temp_filepath(filename)); auto const input = cudf::table_view{{*lists_col}}; - input_limit_test_write(test_files, input); + input_limit_test_write(test_files, input, cudf::io::default_stripe_size_rows); { - int constexpr expected[] = {5000, 5000, 5000}; + // Although we set `stripe_size_rows` to be very large, the writer only write + // 250k rows per stripe. Thus, we have 200 stripes in total. + int constexpr expected[] = {200, 200, 200}; input_limit_test_read( __LINE__, test_files, input, output_limit{0UL}, input_limit{1UL}, expected); } + + { + int constexpr expected[] = {2, 34, 2}; + input_limit_test_read( + __LINE__, test_files, input, output_limit{0UL}, input_limit{5 * 1024 * 1024UL}, expected); + } + + { + int constexpr expected[] = {8, 34, 8}; + input_limit_test_read(__LINE__, + test_files, + input, + output_limit{128 * 1024 * 1024UL}, + input_limit{5 * 1024 * 1024UL}, + expected); + } } From 81b78ea5459a30e4ef7bf0b8e5255a7cb52e0229 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 2 Mar 2024 13:45:38 -0800 Subject: [PATCH 145/321] Add the final test Signed-off-by: Nghia Truong --- cpp/tests/CMakeLists.txt | 4 +- cpp/tests/io/orc_chunked_reader_test.cu | 95 ++++++++++++++++++++++--- 2 files changed, 88 insertions(+), 11 deletions(-) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 511705855f5..058b8555378 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -287,7 +287,9 @@ ConfigureTest( PERCENT 30 ) ConfigureTest( - ORC_TEST io/orc_chunked_reader_test.cpp + ORC_TEST + io/orc_chunked_reader_test.cu + io/orc_test.cpp GPUS 1 PERCENT 30 ) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 21210426b24..2c7e43060f6 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1112,7 +1112,6 @@ struct value_gen { __device__ T operator()(int i) const { return i % 1024; } }; -#if 0 struct char_values { __device__ int8_t operator()(int i) const { @@ -1121,7 +1120,6 @@ struct char_values { return index == 0 ? 'a' : (index == 1 ? 'b' : 'c'); } }; -#endif } // namespace @@ -1156,15 +1154,10 @@ TEST_F(OrcChunkedReaderInputLimitTest, ListType) auto const filename = std::string{"list_type"}; auto const test_files = input_limit_get_test_names(temp_env->get_temp_filepath(filename)); auto const input = cudf::table_view{{*lists_col}}; - input_limit_test_write(test_files, input, cudf::io::default_stripe_size_rows); - { - // Although we set `stripe_size_rows` to be very large, the writer only write - // 250k rows per stripe. Thus, we have 200 stripes in total. - int constexpr expected[] = {200, 200, 200}; - input_limit_test_read( - __LINE__, test_files, input, output_limit{0UL}, input_limit{1UL}, expected); - } + // Although we set `stripe_size_rows` to be very large, the writer only write + // 250k rows per stripe. Thus, we have 200 stripes in total. + input_limit_test_write(test_files, input, cudf::io::default_stripe_size_rows); { int constexpr expected[] = {2, 34, 2}; @@ -1182,3 +1175,85 @@ TEST_F(OrcChunkedReaderInputLimitTest, ListType) expected); } } + +TEST_F(OrcChunkedReaderInputLimitTest, MixedColumnsHavingList) +{ + int constexpr num_rows = 50'000'000; + int constexpr list_size = 4; + int constexpr str_size = 3; + + auto const stream = cudf::get_default_stream(); + auto const iter = thrust::make_counting_iterator(0); + + // list + auto offset_col = cudf::make_fixed_width_column( + cudf::data_type{cudf::type_id::INT32}, num_rows + 1, cudf::mask_state::UNALLOCATED); + thrust::transform(rmm::exec_policy(stream), + iter, + iter + num_rows + 1, + offset_col->mutable_view().begin(), + offset_gen{list_size}); + + int constexpr num_ints = num_rows * list_size; + auto value_col = cudf::make_fixed_width_column( + cudf::data_type{cudf::type_id::INT32}, num_ints, cudf::mask_state::UNALLOCATED); + thrust::transform(rmm::exec_policy(stream), + iter, + iter + num_ints, + value_col->mutable_view().begin(), + value_gen{}); + + auto const lists_col = + cudf::make_lists_column(num_rows, std::move(offset_col), std::move(value_col), 0, {}, stream); + + // strings + int constexpr num_chars = num_rows * str_size; + auto str_offset_col = cudf::make_fixed_width_column( + cudf::data_type{cudf::type_id::INT32}, num_rows + 1, cudf::mask_state::UNALLOCATED); + thrust::transform(rmm::exec_policy(stream), + iter, + iter + num_rows + 1, + str_offset_col->mutable_view().begin(), + offset_gen{str_size}); + rmm::device_buffer str_chars(num_chars, stream); + thrust::transform(rmm::exec_policy(stream), + iter, + iter + num_chars, + static_cast(str_chars.data()), + char_values{}); + auto const str_col = + cudf::make_strings_column(num_rows, std::move(str_offset_col), std::move(str_chars), 0, {}); + + // doubles + auto const double_col = cudf::make_fixed_width_column( + cudf::data_type{cudf::type_id::FLOAT64}, num_rows, cudf::mask_state::UNALLOCATED); + thrust::transform(rmm::exec_policy(stream), + iter, + iter + num_rows, + double_col->mutable_view().begin(), + value_gen{}); + + auto const filename = std::string{"mixed_cols_having_list"}; + auto const test_files = input_limit_get_test_names(temp_env->get_temp_filepath(filename)); + auto const input = cudf::table_view{{*lists_col, *str_col, *double_col}}; + + // Although we set `stripe_size_rows` to be very large, the writer only write + // 250k rows per stripe. Thus, we have 200 stripes in total. + input_limit_test_write(test_files, input, cudf::io::default_stripe_size_rows); + + { + int constexpr expected[] = {11, 7, 5}; + input_limit_test_read( + __LINE__, test_files, input, output_limit{0UL}, input_limit{128 * 1024 * 1024UL}, expected); + } + + { + int constexpr expected[] = {21, 13, 14}; + input_limit_test_read(__LINE__, + test_files, + input, + output_limit{128 * 1024 * 1024UL}, + input_limit{128 * 1024 * 1024UL}, + expected); + } +} From 5537033597aa0c0835b6da3475def7ff9726a464 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 2 Mar 2024 14:12:56 -0800 Subject: [PATCH 146/321] Change debug info Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index e176e32f561..867ef4b508f 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -490,8 +490,11 @@ void reader::impl::global_preprocess(uint64_t skip_rows, } printf("total stripe sizes:\n"); + int count{0}; for (auto& size : total_stripe_sizes) { + ++count; printf("size: %ld, %zu\n", size.count, size.size_bytes); + if (count > 5) break; } // Compute the prefix sum of stripe data sizes. @@ -504,9 +507,12 @@ void reader::impl::global_preprocess(uint64_t skip_rows, total_stripe_sizes.device_to_host_sync(_stream); + count = 0; printf("prefix sum total stripe sizes:\n"); for (auto& size : total_stripe_sizes) { + ++count; printf("size: %ld, %zu\n", size.count, size.size_bytes); + if (count > 5) break; } // TODO: handle case for extremely large files. @@ -740,8 +746,10 @@ void reader::impl::load_data() chunk.start_idx += stripe_chunk.start_idx; } + int count{0}; for (auto& size : stripe_decomp_sizes) { printf("decomp size: %ld, %zu\n", size.count, size.size_bytes); + if (count++ > 5) break; } #ifndef PRINT_DEBUG From 41b9f52c5457b9a95e28b5b4b305ecd112f652a9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 2 Mar 2024 20:10:16 -0800 Subject: [PATCH 147/321] Implement peak memory usage Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 8 +++++++- cpp/src/io/orc/reader_impl.hpp | 23 +++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index dc2f9fbdebe..9fef8782dee 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -1396,6 +1396,11 @@ table_with_metadata reader::impl::make_output_chunk() }(); #endif + + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << "peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) + << " MB)" << std::endl; + return {std::move(out_table), _out_metadata}; } @@ -1472,7 +1477,8 @@ reader::impl::impl(std::size_t output_size_limit, _chunk_read_data{ output_size_limit, data_read_limit, - output_row_granularity > 0 ? output_row_granularity : DEFAULT_OUTPUT_ROW_GRANULARITY} + output_row_granularity > 0 ? output_row_granularity : DEFAULT_OUTPUT_ROW_GRANULARITY}, + mem_stats_logger(mr) { printf("construct reader , limit = %d, %d, gradunarity %d \n", diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 9ca003672a4..3f93541aed5 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -24,6 +24,7 @@ #include #include +#include // TODO: remove #include @@ -33,6 +34,26 @@ namespace cudf::io::orc::detail { +class memory_stats_logger { + public: + explicit memory_stats_logger(rmm::mr::device_memory_resource* mr) + : existing_mr(mr), statistics_mr(rmm::mr::make_statistics_adaptor(existing_mr)) + { + rmm::mr::set_current_device_resource(&statistics_mr); + } + + ~memory_stats_logger() { rmm::mr::set_current_device_resource(existing_mr); } + + [[nodiscard]] size_t peak_memory_usage() const noexcept + { + return statistics_mr.get_bytes_counter().peak; + } + + private: + rmm::mr::device_memory_resource* existing_mr; + rmm::mr::statistics_resource_adaptor statistics_mr; +}; + struct reader_column_meta; /** @@ -187,6 +208,8 @@ class reader::impl { std::vector> _out_buffers; static constexpr size_type DEFAULT_OUTPUT_ROW_GRANULARITY = 10'000; + + memory_stats_logger mem_stats_logger; }; } // namespace cudf::io::orc::detail From 65976990554f9121fb48bda29a3edc0a8dfa0841 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 2 Mar 2024 20:36:54 -0800 Subject: [PATCH 148/321] Optimize memory usage Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 35 +++++++++++++++----------- cpp/src/io/orc/reader_impl_chunking.cu | 15 ++++++----- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 9fef8782dee..f7dbcc46282 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -78,6 +78,7 @@ namespace { * @return Device buffer to decompressed page data */ rmm::device_buffer decompress_stripe_data( + chunk const& load_stripe_chunk, chunk const& stripe_chunk, stream_id_map const& compinfo_map, OrcDecompressor const& decompressor, @@ -127,7 +128,9 @@ rmm::device_buffer decompress_stripe_data( #endif compinfo.push_back(gpu::CompressedStreamInfo( - static_cast(stripe_data[info.id.stripe_idx].data()) + info.dst_pos, + static_cast( + stripe_data[info.id.stripe_idx - load_stripe_chunk.start_idx].data()) + + info.dst_pos, info.length)); // printf("line %d\n", __LINE__); @@ -1044,7 +1047,7 @@ void reader::impl::decompress_and_decode() CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, "Invalid index rowgroup stream data"); - auto dst_base = static_cast(stripe_data[stripe_idx].data()); + auto dst_base = static_cast(stripe_data[stripe_idx - stripe_start].data()); // printf("line %d\n", __LINE__); // fflush(stdout); @@ -1169,18 +1172,22 @@ void reader::impl::decompress_and_decode() if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { // printf("decompress----------------------\n"); // printf("line %d\n", __LINE__); - fflush(stdout); - auto decomp_data = decompress_stripe_data(stripe_chunk, - _file_itm_data.compinfo_map, - *_metadata.per_file_metadata[0].decompressor, - stripe_data, - stream_info, - chunks, - row_groups, - num_stripes, - _metadata.get_row_index_stride(), - level == 0, - _stream); + // fflush(stdout); + CUDF_EXPECTS(_chunk_read_data.curr_load_stripe_chunk > 0, "ERRRRR"); + + auto decomp_data = decompress_stripe_data( + _chunk_read_data.load_stripe_chunks[_chunk_read_data.curr_load_stripe_chunk - 1], + stripe_chunk, + _file_itm_data.compinfo_map, + *_metadata.per_file_metadata[0].decompressor, + stripe_data, + stream_info, + chunks, + row_groups, + num_stripes, + _metadata.get_row_index_stride(), + level == 0, + _stream); // stripe_data.clear(); stripe_data.push_back(std::move(decomp_data)); diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 867ef4b508f..41ffe3288c0 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -401,8 +401,8 @@ void reader::impl::global_preprocess(uint64_t skip_rows, col_meta.orc_col_map[level][col.id] = col_id++; } - auto& stripe_data = lvl_stripe_data[level]; - stripe_data.resize(num_stripes); + // auto& stripe_data = lvl_stripe_data[level]; + // stripe_data.resize(num_stripes); auto& stream_info = _file_itm_data.lvl_stream_info[level]; auto const num_columns = _selected_columns.levels[level].size(); @@ -567,11 +567,13 @@ void reader::impl::load_data() // Prepare the buffer to read raw data onto. // TODO: clear all old buffer. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { - auto& stripe_data = lvl_stripe_data[level]; + auto& stripe_data = lvl_stripe_data[level]; + stripe_data.resize(stripe_chunk.count); + auto& stripe_sizes = lvl_stripe_sizes[level]; for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { // TODO: only do this if it was not allocated before. - stripe_data[stripe_idx] = rmm::device_buffer( + stripe_data[stripe_idx - stripe_start] = rmm::device_buffer( cudf::util::round_up_safe(stripe_sizes[stripe_idx], BUFFER_PADDING_MULTIPLE), _stream); } } @@ -585,7 +587,7 @@ void reader::impl::load_data() for (auto read_idx = read_begin; read_idx < read_end; ++read_idx) { auto const& read = read_info[read_idx]; auto& stripe_data = lvl_stripe_data[read.level]; - auto dst_base = static_cast(stripe_data[read.stripe_idx].data()); + auto dst_base = static_cast(stripe_data[read.stripe_idx - stripe_start].data()); if (_metadata.per_file_metadata[read.source_idx].source->is_device_read_preferred( read.length)) { @@ -646,7 +648,8 @@ void reader::impl::load_data() for (auto stream_idx = stream_begin; stream_idx < stream_end; ++stream_idx) { auto const& info = stream_info[stream_idx]; compinfo.push_back(gpu::CompressedStreamInfo( - static_cast(stripe_data[info.id.stripe_idx].data()) + info.dst_pos, + static_cast(stripe_data[info.id.stripe_idx - stripe_start].data()) + + info.dst_pos, info.length)); stream_compinfo_map[stream_id_info{ info.id.stripe_idx, info.id.level, info.id.orc_col_idx, info.id.kind}] = &compinfo.back(); From 277758e073d1e7fde9e5527ba843099bf4d09d7c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 2 Mar 2024 20:58:03 -0800 Subject: [PATCH 149/321] Add debug info Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cu | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 2c7e43060f6..47bdb2427e8 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1028,6 +1028,8 @@ void input_limit_test_read(int test_location, for (size_t idx = 0; idx < test_files.size(); ++idx) { SCOPED_TRACE("Original line of failure: " + std::to_string(test_location) + ", file idx: " + std::to_string(idx)); + // TODO: remove + printf("file_idx %d\n", (int)idx); auto const [result, num_chunks] = chunked_read(test_files[idx], output_limit_bytes, input_limit_bytes); EXPECT_EQ(expected_chunk_counts[idx], num_chunks); @@ -1256,4 +1258,11 @@ TEST_F(OrcChunkedReaderInputLimitTest, MixedColumnsHavingList) input_limit{128 * 1024 * 1024UL}, expected); } + + // TODO: remove + { + int constexpr expected[] = {1, 1, 1}; + input_limit_test_read( + __LINE__, test_files, input, output_limit{0UL}, input_limit{0UL}, expected); + } } From 83ba727ac49521e28e856761a75e6fee10893b56 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 3 Mar 2024 07:31:39 -0800 Subject: [PATCH 150/321] Fix a bug in memory write, and add debug info for memory usage Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 135 ++++++++++++++++++++++++- cpp/src/io/orc/reader_impl.hpp | 2 +- cpp/src/io/orc/reader_impl_chunking.cu | 4 + 3 files changed, 135 insertions(+), 6 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index f7dbcc46282..e1086c2df30 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -814,6 +814,9 @@ void reader::impl::decompress_and_decode() auto const stripe_start = stripe_chunk.start_idx; auto const stripe_end = stripe_chunk.start_idx + stripe_chunk.count; + auto const load_stripe_start = + _chunk_read_data.load_stripe_chunks[_chunk_read_data.curr_load_stripe_chunk - 1].start_idx; + printf("\ndecoding data from stripe %d -> %d\n", (int)stripe_start, (int)stripe_end); auto const rows_to_skip = _file_itm_data.rows_to_skip; @@ -938,6 +941,13 @@ void reader::impl::decompress_and_decode() for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { printf("processing level = %d\n", (int)level); + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + auto const& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; auto const [stream_begin, stream_end] = get_range(stripe_stream_chunks, stripe_chunk); @@ -979,6 +989,13 @@ void reader::impl::decompress_and_decode() chunks = cudf::detail::hostdevice_2dvector(num_stripes, num_columns, _stream); memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + const bool use_index = _config.use_index && // Do stripes have row group index @@ -1047,7 +1064,9 @@ void reader::impl::decompress_and_decode() CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, "Invalid index rowgroup stream data"); - auto dst_base = static_cast(stripe_data[stripe_idx - stripe_start].data()); + // TODO: Wrong? + // stripe load_stripe_start? + auto dst_base = static_cast(stripe_data[stripe_idx - load_stripe_start].data()); // printf("line %d\n", __LINE__); // fflush(stdout); @@ -1175,6 +1194,13 @@ void reader::impl::decompress_and_decode() // fflush(stdout); CUDF_EXPECTS(_chunk_read_data.curr_load_stripe_chunk > 0, "ERRRRR"); + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + auto decomp_data = decompress_stripe_data( _chunk_read_data.load_stripe_chunks[_chunk_read_data.curr_load_stripe_chunk - 1], stripe_chunk, @@ -1189,7 +1215,18 @@ void reader::impl::decompress_and_decode() level == 0, _stream); // stripe_data.clear(); - stripe_data.push_back(std::move(decomp_data)); + // stripe_data.push_back(std::move(decomp_data)); + stripe_data[stripe_start - load_stripe_start] = std::move(decomp_data); + for (int64_t i = 1; i < stripe_chunk.count; ++i) { + stripe_data[i + stripe_start - load_stripe_start] = {}; + } + + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } // printf("line %d\n", __LINE__); // fflush(stdout); @@ -1217,6 +1254,13 @@ void reader::impl::decompress_and_decode() // printf("line %d\n", __LINE__); // fflush(stdout); + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + for (std::size_t i = 0; i < column_types.size(); ++i) { bool is_nullable = false; for (std::size_t j = 0; j < num_stripes; ++j) { @@ -1231,14 +1275,35 @@ void reader::impl::decompress_and_decode() // printf(" create col, num rows: %d\n", (int)n_rows); + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + // For list column, offset column will be always size + 1 if (is_list_type) n_rows++; _out_buffers[level].emplace_back(column_types[i], n_rows, is_nullable, _stream, _mr); + + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } } // printf("line %d\n", __LINE__); // fflush(stdout); + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + decode_stream_data(num_dict_entries, rows_to_skip, _metadata.get_row_index_stride(), @@ -1250,6 +1315,13 @@ void reader::impl::decompress_and_decode() _stream, _mr); + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + // printf("line %d\n", __LINE__); // fflush(stdout); @@ -1286,6 +1358,13 @@ void reader::impl::decompress_and_decode() // fflush(stdout); } // end loop level + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + std::vector> out_columns; _out_metadata = get_meta_with_user_data(); std::transform( @@ -1300,6 +1379,13 @@ void reader::impl::decompress_and_decode() }); _chunk_read_data.decoded_table = std::make_unique
(std::move(out_columns)); + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + // printf("col: \n"); // cudf::test::print(_chunk_read_data.decoded_table->get_column(0).view()); @@ -1322,6 +1408,13 @@ void reader::impl::decompress_and_decode() printf("{%ld, %ld}\n", splits[idx].start_idx, splits[idx].count); } fflush(stdout); + + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << "decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } } void reader::impl::prepare_data(int64_t skip_rows, @@ -1361,6 +1454,13 @@ void reader::impl::prepare_data(int64_t skip_rows, table_with_metadata reader::impl::make_output_chunk() { + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << "start to make out, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + // There is no columns in the table. if (_selected_columns.num_levels() == 0) { return {std::make_unique
(), table_metadata{}}; } @@ -1392,6 +1492,13 @@ table_with_metadata reader::impl::make_output_chunk() return std::move(_chunk_read_data.decoded_table); } + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << "prepare to make out, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + auto const out_chunk = _chunk_read_data.output_table_chunks[_chunk_read_data.curr_output_table_chunk++]; auto const out_tview = @@ -1399,14 +1506,24 @@ table_with_metadata reader::impl::make_output_chunk() {static_cast(out_chunk.start_idx), static_cast(out_chunk.start_idx + out_chunk.count)}, _stream)[0]; + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << "done make out, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + return std::make_unique
(out_tview, _stream, _mr); }(); #endif - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << "peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) - << " MB)" << std::endl; + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << "done, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } return {std::move(out_table), _out_metadata}; } @@ -1517,6 +1634,14 @@ table_with_metadata reader::impl::read_chunk() { printf("==================call read chunk\n"); prepare_data(); + + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << "done prepare data, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + return make_output_chunk(); } diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 3f93541aed5..d605975e1ad 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -46,7 +46,7 @@ class memory_stats_logger { [[nodiscard]] size_t peak_memory_usage() const noexcept { - return statistics_mr.get_bytes_counter().peak; + return statistics_mr.get_bytes_counter().value; } private: diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 41ffe3288c0..944f23e7764 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -778,6 +778,10 @@ void reader::impl::load_data() // lvl_stripe_data.clear(); // _file_itm_data.compinfo_ready = true; + + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << "load, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } } // namespace cudf::io::orc::detail From 5dcd61242b8773d218116379a94209f73d64f4f8 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 3 Mar 2024 09:24:56 -0800 Subject: [PATCH 151/321] Debugging memory leak Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index e1086c2df30..7412f9d2251 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -1216,6 +1216,8 @@ void reader::impl::decompress_and_decode() _stream); // stripe_data.clear(); // stripe_data.push_back(std::move(decomp_data)); + + // TODO: only reset each one if the new size/type are different. stripe_data[stripe_start - load_stripe_start] = std::move(decomp_data); for (int64_t i = 1; i < stripe_chunk.count; ++i) { stripe_data[i + stripe_start - load_stripe_start] = {}; @@ -1261,6 +1263,17 @@ void reader::impl::decompress_and_decode() << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } + // TODO: do not clear but reset each one. + // and only reset if the new size/type are different. + _out_buffers[level].clear(); + + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + for (std::size_t i = 0; i < column_types.size(); ++i) { bool is_nullable = false; for (std::size_t j = 0; j < num_stripes; ++j) { @@ -1289,7 +1302,8 @@ void reader::impl::decompress_and_decode() { _stream.synchronize(); auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + std::cout << __LINE__ << ", buffer size: " << n_rows + << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } } @@ -1379,6 +1393,13 @@ void reader::impl::decompress_and_decode() }); _chunk_read_data.decoded_table = std::make_unique
(std::move(out_columns)); + // TODO: do not clear but reset each one. + // and only reset if the new size/type are different. + // This clear is just to check if there is memory leak. + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + _out_buffers[level].clear(); + } + { _stream.synchronize(); auto peak_mem = mem_stats_logger.peak_memory_usage(); @@ -1489,6 +1510,7 @@ table_with_metadata reader::impl::make_output_chunk() auto out_table = [&] { if (_chunk_read_data.output_table_chunks.size() == 1) { _chunk_read_data.curr_output_table_chunk++; + printf("one chunk, no more table---------------------------------\n"); return std::move(_chunk_read_data.decoded_table); } @@ -1633,6 +1655,13 @@ bool reader::impl::has_next() table_with_metadata reader::impl::read_chunk() { printf("==================call read chunk\n"); + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << "\n\n\nstart read chunk, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + prepare_data(); { From 04acd0f381ca170a9ec267777c9705bd3a155d08 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 3 Mar 2024 10:18:24 -0800 Subject: [PATCH 152/321] Fix memory leak Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 7412f9d2251..5f6d02f43f1 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -1398,6 +1398,16 @@ void reader::impl::decompress_and_decode() // This clear is just to check if there is memory leak. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { _out_buffers[level].clear(); + + auto& stripe_data = lvl_stripe_data[level]; + + if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { + stripe_data[stripe_start - load_stripe_start] = {}; + } else { + for (int64_t i = 0; i < stripe_chunk.count; ++i) { + stripe_data[i + stripe_start - load_stripe_start] = {}; + } + } } { @@ -1658,10 +1668,28 @@ table_with_metadata reader::impl::read_chunk() { _stream.synchronize(); auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << "\n\n\nstart read chunk, peak_memory_usage: " << peak_mem << "(" + std::cout << "\n\n\n------------start read chunk, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } + { + static int count{0}; + ++count; + +#if 0 + if (count == 3) { + _file_itm_data.lvl_stripe_data.clear(); + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << "clear all, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + exit(0); + } +#endif + } + prepare_data(); { From 97f80c823eef8b79671523331ce77387ae4ba99b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 3 Mar 2024 19:33:36 -0800 Subject: [PATCH 153/321] Change comments Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cu | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 47bdb2427e8..c0cdeb1b107 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1158,7 +1158,8 @@ TEST_F(OrcChunkedReaderInputLimitTest, ListType) auto const input = cudf::table_view{{*lists_col}}; // Although we set `stripe_size_rows` to be very large, the writer only write - // 250k rows per stripe. Thus, we have 200 stripes in total. + // 250k rows (top level) per stripe due to having nested type. + // Thus, we have 200 stripes in total. input_limit_test_write(test_files, input, cudf::io::default_stripe_size_rows); { @@ -1240,7 +1241,8 @@ TEST_F(OrcChunkedReaderInputLimitTest, MixedColumnsHavingList) auto const input = cudf::table_view{{*lists_col, *str_col, *double_col}}; // Although we set `stripe_size_rows` to be very large, the writer only write - // 250k rows per stripe. Thus, we have 200 stripes in total. + // 250k rows (top level) per stripe due to having nested type. + // Thus, we have 200 stripes in total. input_limit_test_write(test_files, input, cudf::io::default_stripe_size_rows); { From e425e416b51826eb4d53b7c8139099b7e5c64690 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 3 Mar 2024 19:33:42 -0800 Subject: [PATCH 154/321] Change memory stats Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index d605975e1ad..3f93541aed5 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -46,7 +46,7 @@ class memory_stats_logger { [[nodiscard]] size_t peak_memory_usage() const noexcept { - return statistics_mr.get_bytes_counter().value; + return statistics_mr.get_bytes_counter().peak; } private: From 8d7730926a547b842b5ce9df6b830d0a1d8caae2 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 3 Mar 2024 19:57:35 -0800 Subject: [PATCH 155/321] Change read limit ratio Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.hpp | 2 +- cpp/tests/io/orc_chunked_reader_test.cu | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 18fcbf25bdb..0769f46f1d1 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -212,7 +212,7 @@ struct chunk_read_data { std::size_t data_read_limit; // approximate maximum size (in bytes) used for store // intermediate data, or 0 for no limit size_type output_row_granularity; // TODO - static double constexpr load_limit_ratio{0.3}; // TODO + static double constexpr load_limit_ratio{0.4}; // TODO // Chunks of stripes that can be load into memory such that their data size is within a size // limit. diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index c0cdeb1b107..7520a54e174 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1060,7 +1060,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, SingleFixedWidthColumn) } { - int constexpr expected[] = {17, 10, 9}; + int constexpr expected[] = {10, 13, 10}; input_limit_test_read( __LINE__, test_files, input, output_limit{0UL}, input_limit{2 * 1024 * 1024UL}, expected); } @@ -1096,7 +1096,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, MixedColumns) } { - int constexpr expected[] = {17, 50, 14}; + int constexpr expected[] = {10, 50, 15}; input_limit_test_read( __LINE__, test_files, input, output_limit{0UL}, input_limit{2 * 1024 * 1024UL}, expected); } @@ -1163,13 +1163,13 @@ TEST_F(OrcChunkedReaderInputLimitTest, ListType) input_limit_test_write(test_files, input, cudf::io::default_stripe_size_rows); { - int constexpr expected[] = {2, 34, 2}; + int constexpr expected[] = {2, 40, 3}; input_limit_test_read( __LINE__, test_files, input, output_limit{0UL}, input_limit{5 * 1024 * 1024UL}, expected); } { - int constexpr expected[] = {8, 34, 8}; + int constexpr expected[] = {8, 40, 9}; input_limit_test_read(__LINE__, test_files, input, @@ -1246,13 +1246,13 @@ TEST_F(OrcChunkedReaderInputLimitTest, MixedColumnsHavingList) input_limit_test_write(test_files, input, cudf::io::default_stripe_size_rows); { - int constexpr expected[] = {11, 7, 5}; + int constexpr expected[] = {8, 8, 6}; input_limit_test_read( __LINE__, test_files, input, output_limit{0UL}, input_limit{128 * 1024 * 1024UL}, expected); } { - int constexpr expected[] = {21, 13, 14}; + int constexpr expected[] = {16, 15, 17}; input_limit_test_read(__LINE__, test_files, input, From c4f98ee5c94c7fe82c1ef1979ff3cb32272c5307 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 3 Mar 2024 20:52:37 -0800 Subject: [PATCH 156/321] Test read with very large file Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cu | 77 +++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 7520a54e174..b58ee3692d9 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1268,3 +1269,79 @@ TEST_F(OrcChunkedReaderInputLimitTest, MixedColumnsHavingList) __LINE__, test_files, input, output_limit{0UL}, input_limit{0UL}, expected); } } + +TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) +{ + using cudf::test::iterators::no_nulls; + + int64_t constexpr num_rows = 500'000'000l; + int constexpr rows_per_stripe = 1'000'000; + int constexpr num_reps = 5l; + int64_t constexpr total_rows = num_rows * num_reps; + static_assert(total_rows > std::numeric_limits::max()); + + auto const it = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 127; }); + auto const col = + cudf::test::fixed_width_column_wrapper( + it, it + num_rows); + auto const chunk_table = cudf::table_view{{col}}; + + std::vector data_buffer; + { + auto const write_opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{&data_buffer}) + .stripe_size_rows(rows_per_stripe) + .build(); + + auto writer = cudf::io::orc_chunked_writer(write_opts); + for (int i = 0; i < num_reps; ++i) { + writer.write(chunk_table); + } + } + + // Test reading the metadata + auto const metadata = + cudf::io::read_orc_metadata(cudf::io::source_info{data_buffer.data(), data_buffer.size()}); + EXPECT_EQ(metadata.num_rows(), total_rows); + EXPECT_EQ(metadata.num_stripes(), total_rows / rows_per_stripe); + + printf("start test chunk\n"); + fflush(stdout); + + int constexpr num_rows_to_read = 5'000'000; + const auto num_rows_to_skip = metadata.num_rows() - num_rows_to_read; + + // Check validity of the last 5 million rows. + const auto sequence_start = num_rows_to_skip % num_rows; + auto const skipped_col = + cudf::test::fixed_width_column_wrapper( + it + sequence_start, it + sequence_start + num_rows_to_read, no_nulls()); + auto const expected = cudf::table_view{{skipped_col}}; + + auto const read_opts = cudf::io::orc_reader_options::builder( + cudf::io::source_info{data_buffer.data(), data_buffer.size()}) + .use_index(false) + .skip_rows(num_rows_to_skip) + .build(); + auto reader = cudf::io::chunked_orc_reader( + 500'000UL /*output limit*/, + 1'000'000UL /*input limit*/, + 500'000 /*output granularity, or minimum number of rows for the output chunk*/, + read_opts); + + auto num_chunks = 0; + auto read_tables = std::vector>{}; + auto tviews = std::vector{}; + + do { + auto chunk = reader.read_chunk(); + ++num_chunks; + tviews.emplace_back(chunk.tbl->view()); + read_tables.emplace_back(std::move(chunk.tbl)); + } while (reader.has_next()); + + auto const read_result = cudf::concatenate(tviews); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, read_result->view()); + + printf("num chunk: %d\n", num_chunks); +} From ae665a0ef81cca5423e1ddc543acbb8f41346390 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 3 Mar 2024 21:48:25 -0800 Subject: [PATCH 157/321] Support `skip_rows` and `num_rows` Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 11 ++++++++--- cpp/src/io/orc/reader_impl.hpp | 13 +++++++++---- cpp/tests/io/orc_chunked_reader_test.cu | 8 +++++++- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 5f6d02f43f1..a4b46bb49f4 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -1460,6 +1460,8 @@ void reader::impl::prepare_data(int64_t skip_rows, // There are no columns in the table. if (_selected_columns.num_levels() == 0) { return; } + std::cout << "call global, skip = " << skip_rows << std::endl; + global_preprocess(skip_rows, num_rows_opt, stripes); if (!_chunk_read_data.more_table_chunk_to_output()) { @@ -1625,7 +1627,10 @@ reader::impl::impl(std::size_t output_size_limit, _config{options.get_timestamp_type(), options.is_enabled_use_index(), options.is_enabled_use_np_dtypes(), - options.get_decimal128_columns()}, + options.get_decimal128_columns(), + options.get_skip_rows(), + options.get_num_rows(), + options.get_stripes()}, _col_meta{std::make_unique()}, _sources(std::move(sources)), _metadata{_sources, stream}, @@ -1656,7 +1661,7 @@ table_with_metadata reader::impl::read(int64_t skip_rows, bool reader::impl::has_next() { printf("==================query has next \n"); - prepare_data(); + prepare_data(_config.skip_rows, _config.num_read_rows, _config.selected_stripes); printf("has next: %d\n", (int)_chunk_read_data.has_next()); return _chunk_read_data.has_next(); @@ -1690,7 +1695,7 @@ table_with_metadata reader::impl::read_chunk() #endif } - prepare_data(); + prepare_data(_config.skip_rows, _config.num_read_rows, _config.selected_stripes); { _stream.synchronize(); diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 3f93541aed5..4a32394c91f 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -190,10 +190,15 @@ class reader::impl { // Reader configs struct { - data_type timestamp_type; // Override output timestamp resolution - bool use_index; // Enable or disable attempt to use row index for parsing - bool use_np_dtypes; // Enable or disable the conversion to numpy-compatible dtypes - std::vector decimal128_columns; // Control decimals conversion + data_type timestamp_type; // override output timestamp resolution + bool use_index; // enable or disable attempt to use row index for parsing + bool use_np_dtypes; // enable or disable the conversion to numpy-compatible dtypes + std::vector decimal128_columns; // control decimals conversion + + // User specified reading rows/stripes selection. + uint64_t const skip_rows; + std::optional num_read_rows; + std::vector> const selected_stripes; } const _config; // Intermediate data for internal processing. diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index b58ee3692d9..4a67cda8757 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1270,11 +1270,15 @@ TEST_F(OrcChunkedReaderInputLimitTest, MixedColumnsHavingList) } } +#define LOCAL_TEST + +// This test is extremely heavy, thus it should be disabled by default. +#ifdef LOCAL_TEST TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) { using cudf::test::iterators::no_nulls; - int64_t constexpr num_rows = 500'000'000l; + int64_t constexpr num_rows = 1'000'000'000l; int constexpr rows_per_stripe = 1'000'000; int constexpr num_reps = 5l; int64_t constexpr total_rows = num_rows * num_reps; @@ -1310,6 +1314,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) int constexpr num_rows_to_read = 5'000'000; const auto num_rows_to_skip = metadata.num_rows() - num_rows_to_read; + // - 123456 /*just shift the read data region back by a random offset*/; // Check validity of the last 5 million rows. const auto sequence_start = num_rows_to_skip % num_rows; @@ -1345,3 +1350,4 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) printf("num chunk: %d\n", num_chunks); } +#endif From 883ccc04afe9bf79285261e23e2525d35131efec Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 4 Mar 2024 09:43:58 -0800 Subject: [PATCH 158/321] Fix test with very large file Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cu | 27 +++++++++---------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 4a67cda8757..326eaac73b9 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1277,17 +1277,16 @@ TEST_F(OrcChunkedReaderInputLimitTest, MixedColumnsHavingList) TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) { using cudf::test::iterators::no_nulls; + using int64s_col = cudf::test::fixed_width_column_wrapper; - int64_t constexpr num_rows = 1'000'000'000l; + int64_t constexpr num_rows = 500'000'000l; int constexpr rows_per_stripe = 1'000'000; - int constexpr num_reps = 5l; + int constexpr num_reps = 10l; int64_t constexpr total_rows = num_rows * num_reps; static_assert(total_rows > std::numeric_limits::max()); - auto const it = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 127; }); - auto const col = - cudf::test::fixed_width_column_wrapper( - it, it + num_rows); + auto const it = thrust::make_counting_iterator(int64_t{0}); + auto const col = int64s_col(it, it + num_rows); auto const chunk_table = cudf::table_view{{col}}; std::vector data_buffer; @@ -1309,19 +1308,14 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) EXPECT_EQ(metadata.num_rows(), total_rows); EXPECT_EQ(metadata.num_stripes(), total_rows / rows_per_stripe); - printf("start test chunk\n"); - fflush(stdout); - int constexpr num_rows_to_read = 5'000'000; const auto num_rows_to_skip = metadata.num_rows() - num_rows_to_read; // - 123456 /*just shift the read data region back by a random offset*/; // Check validity of the last 5 million rows. const auto sequence_start = num_rows_to_skip % num_rows; - auto const skipped_col = - cudf::test::fixed_width_column_wrapper( - it + sequence_start, it + sequence_start + num_rows_to_read, no_nulls()); - auto const expected = cudf::table_view{{skipped_col}}; + auto const skipped_col = int64s_col(it + sequence_start, it + sequence_start + num_rows_to_read); + auto const expected = cudf::table_view{{skipped_col}}; auto const read_opts = cudf::io::orc_reader_options::builder( cudf::io::source_info{data_buffer.data(), data_buffer.size()}) @@ -1329,8 +1323,8 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) .skip_rows(num_rows_to_skip) .build(); auto reader = cudf::io::chunked_orc_reader( - 500'000UL /*output limit*/, - 1'000'000UL /*input limit*/, + 600'000UL * sizeof(int64_t) /*output limit, equal to 600k int64_t rows */, + 8'000'000UL /*input limit, around size of 1 stripe's decoded data */, 500'000 /*output granularity, or minimum number of rows for the output chunk*/, read_opts); @@ -1346,8 +1340,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) } while (reader.has_next()); auto const read_result = cudf::concatenate(tviews); + EXPECT_EQ(num_chunks, 10); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, read_result->view()); - - printf("num chunk: %d\n", num_chunks); } #endif From 625d0f4826e333e32d9fb0a07801f028fb5cf556 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 4 Mar 2024 10:28:23 -0800 Subject: [PATCH 159/321] Some refactors Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cu | 93 +++++++++++++++++++++---- 1 file changed, 78 insertions(+), 15 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 326eaac73b9..13f0311ab1c 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -58,13 +58,14 @@ auto const temp_env = reinterpret_cast( using int32s_col = cudf::test::fixed_width_column_wrapper; using int64s_col = cudf::test::fixed_width_column_wrapper; +using doubles_col = cudf::test::fixed_width_column_wrapper; using strings_col = cudf::test::strings_column_wrapper; using structs_col = cudf::test::structs_column_wrapper; using int32s_lists_col = cudf::test::lists_column_wrapper; auto write_file(std::vector>& input_columns, std::string const& filename, - bool nullable, + bool nullable = false, std::size_t stripe_size_bytes = cudf::io::default_stripe_size_bytes, cudf::size_type stripe_size_rows = cudf::io::default_stripe_size_rows) { @@ -161,7 +162,7 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadNoData) input_columns.emplace_back(int32s_col{}.release()); input_columns.emplace_back(int64s_col{}.release()); - auto const [expected, filepath] = write_file(input_columns, "chunked_read_empty", false); + auto const [expected, filepath] = write_file(input_columns, "chunked_read_empty"); auto const [result, num_chunks] = chunked_read(filepath, output_limit{1'000}); EXPECT_EQ(num_chunks, 1); EXPECT_EQ(result->num_rows(), 0); @@ -223,7 +224,7 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadBoundaryCases) std::vector> input_columns; auto const value_iter = thrust::make_counting_iterator(0); input_columns.emplace_back(int32s_col(value_iter, value_iter + num_rows).release()); - return write_file(input_columns, "chunked_read_simple_boundary", false /*nullable*/); + return write_file(input_columns, "chunked_read_simple_boundary"); }(); // Test with zero limit: everything will be read in one chunk. @@ -540,7 +541,7 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadWithListsNoNulls) input_columns.emplace_back( std::move(cudf::gather(cudf::table_view{{template_lists}}, gather_map)->release().front())); - return write_file(input_columns, "chunked_read_with_lists_no_null", false /*nullable*/); + return write_file(input_columns, "chunked_read_with_lists_no_null"); }(); // Test with zero limit: everything will be read in one chunk. @@ -948,9 +949,8 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadNullCount) auto const sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return 1; }); auto const validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 4 != 3; }); - cudf::test::fixed_width_column_wrapper col{sequence, sequence + num_rows, validity}; std::vector> cols; - cols.push_back(col.release()); + cols.push_back(int32s_col{sequence, sequence + num_rows, validity}.release()); auto const expected = std::make_unique(std::move(cols)); auto const filepath = temp_env->get_temp_filepath("chunked_reader_null_count.orc"); @@ -1047,7 +1047,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, SingleFixedWidthColumn) { auto constexpr num_rows = 1'000'000; auto const iter1 = thrust::make_constant_iterator(15); - auto const col1 = cudf::test::fixed_width_column_wrapper(iter1, iter1 + num_rows); + auto const col1 = doubles_col(iter1, iter1 + num_rows); auto const filename = std::string{"single_col_fixed_width"}; auto const test_files = input_limit_get_test_names(temp_env->get_temp_filepath(filename)); @@ -1072,10 +1072,10 @@ TEST_F(OrcChunkedReaderInputLimitTest, MixedColumns) auto constexpr num_rows = 1'000'000; auto const iter1 = thrust::make_counting_iterator(0); - auto const col1 = cudf::test::fixed_width_column_wrapper(iter1, iter1 + num_rows); + auto const col1 = int32s_col(iter1, iter1 + num_rows); auto const iter2 = thrust::make_counting_iterator(0); - auto const col2 = cudf::test::fixed_width_column_wrapper(iter2, iter2 + num_rows); + auto const col2 = doubles_col(iter2, iter2 + num_rows); auto const strings = std::vector{"abc", "de", "fghi"}; auto const str_iter = cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { @@ -1270,15 +1270,77 @@ TEST_F(OrcChunkedReaderInputLimitTest, MixedColumnsHavingList) } } +TEST_F(OrcChunkedReaderInputLimitTest, ReadWithRowSelection) +{ + int64_t constexpr num_rows = 100'000'000l; + int constexpr rows_per_stripe = 100'000; + + auto const it = thrust::make_counting_iterator(0); + auto const col = int32s_col(it, it + num_rows); + auto const input = cudf::table_view{{col}}; + + auto const filepath = temp_env->get_temp_filepath("chunk_read_with_row_selection.orc"); + auto const write_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, input) + .stripe_size_rows(rows_per_stripe) + .build(); + cudf::io::write_orc(write_opts); + + // Verify metadata. + auto const metadata = cudf::io::read_orc_metadata(cudf::io::source_info{filepath}); + EXPECT_EQ(metadata.num_rows(), num_rows); + EXPECT_EQ(metadata.num_stripes(), num_rows / rows_per_stripe); + + int constexpr random_val = 123456; + + // Read some random number or rows that is not stripe size. + int constexpr num_rows_to_read = rows_per_stripe * 5 + random_val; + + // Just shift the read data region back by a random offset. + const auto num_rows_to_skip = num_rows - num_rows_to_read - random_val; + + const auto sequence_start = num_rows_to_skip % num_rows; + auto const skipped_col = int32s_col(it + sequence_start, it + sequence_start + num_rows_to_read); + auto const expected = cudf::table_view{{skipped_col}}; + + auto const read_opts = cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) + .use_index(false) + .skip_rows(num_rows_to_skip) + .num_rows(num_rows_to_read) + .build(); + + auto reader = cudf::io::chunked_orc_reader( + 60'000UL * sizeof(int) /*output limit, equal to 60k rows, less than rows in 1 stripe*/, + rows_per_stripe * sizeof(int) /*input limit, around size of 1 stripe's decoded data*/, + 50'000 /*output granularity, or minimum number of rows for the output chunk*/, + read_opts); + + auto num_chunks = 0; + auto read_tables = std::vector>{}; + auto tviews = std::vector{}; + + do { + auto chunk = reader.read_chunk(); + // Each output chunk should have either exactly 50k rows, or num_rows_to_read % 50k. + EXPECT_TRUE(chunk.tbl->num_rows() == 50000 || + chunk.tbl->num_rows() == num_rows_to_read % 50000); + + tviews.emplace_back(chunk.tbl->view()); + read_tables.emplace_back(std::move(chunk.tbl)); + ++num_chunks; + } while (reader.has_next()); + + auto const read_result = cudf::concatenate(tviews); + EXPECT_EQ(num_chunks, 13); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, read_result->view()); +} + #define LOCAL_TEST // This test is extremely heavy, thus it should be disabled by default. #ifdef LOCAL_TEST TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) { - using cudf::test::iterators::no_nulls; - using int64s_col = cudf::test::fixed_width_column_wrapper; - int64_t constexpr num_rows = 500'000'000l; int constexpr rows_per_stripe = 1'000'000; int constexpr num_reps = 10l; @@ -1302,15 +1364,15 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) } } - // Test reading the metadata + // Verify metadata. auto const metadata = cudf::io::read_orc_metadata(cudf::io::source_info{data_buffer.data(), data_buffer.size()}); EXPECT_EQ(metadata.num_rows(), total_rows); EXPECT_EQ(metadata.num_stripes(), total_rows / rows_per_stripe); int constexpr num_rows_to_read = 5'000'000; - const auto num_rows_to_skip = metadata.num_rows() - num_rows_to_read; - // - 123456 /*just shift the read data region back by a random offset*/; + const auto num_rows_to_skip = metadata.num_rows() - num_rows_to_read - + 123456 /*just shift the read data region back by a random offset*/; // Check validity of the last 5 million rows. const auto sequence_start = num_rows_to_skip % num_rows; @@ -1321,6 +1383,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) cudf::io::source_info{data_buffer.data(), data_buffer.size()}) .use_index(false) .skip_rows(num_rows_to_skip) + .num_rows(num_rows_to_read) .build(); auto reader = cudf::io::chunked_orc_reader( 600'000UL * sizeof(int64_t) /*output limit, equal to 600k int64_t rows */, From 974bb7faf25c33b21d8409158e5f80a07023560e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 4 Mar 2024 11:06:52 -0800 Subject: [PATCH 160/321] Update debug info Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index a4b46bb49f4..1705369e7dd 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -1552,11 +1552,18 @@ table_with_metadata reader::impl::make_output_chunk() #endif - { + if (!_chunk_read_data.has_next()) { + static int count{0}; + count++; _stream.synchronize(); auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << "done, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + std::cout << "complete, " << count << ", peak_memory_usage: " << peak_mem + << " , MB = " << (peak_mem * 1.0) / (1024.0 * 1024.0) << std::endl; + } else { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << "done, partial, peak_memory_usage: " << peak_mem + << " , MB = " << (peak_mem * 1.0) / (1024.0 * 1024.0) << std::endl; } return {std::move(out_table), _out_metadata}; From bdb586ead08f044119b93d55e654e00b7f31b575 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 4 Mar 2024 11:06:59 -0800 Subject: [PATCH 161/321] Add a temporary test Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cu | 384 ++++++++++++++++++++++-- 1 file changed, 359 insertions(+), 25 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 13f0311ab1c..3a2ace205b4 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1182,7 +1182,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, ListType) TEST_F(OrcChunkedReaderInputLimitTest, MixedColumnsHavingList) { - int constexpr num_rows = 50'000'000; + int constexpr num_rows = 1'000'000; int constexpr list_size = 4; int constexpr str_size = 3; @@ -1241,33 +1241,367 @@ TEST_F(OrcChunkedReaderInputLimitTest, MixedColumnsHavingList) auto const test_files = input_limit_get_test_names(temp_env->get_temp_filepath(filename)); auto const input = cudf::table_view{{*lists_col, *str_col, *double_col}}; - // Although we set `stripe_size_rows` to be very large, the writer only write - // 250k rows (top level) per stripe due to having nested type. - // Thus, we have 200 stripes in total. - input_limit_test_write(test_files, input, cudf::io::default_stripe_size_rows); + for (int iters = 1; iters <= 100; ++iters) { + { + auto const write_opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{test_files[0]}) + .stripe_size_rows(cudf::io::default_stripe_size_rows) + .build(); + + auto writer = cudf::io::orc_chunked_writer(write_opts); + for (int i = 0; i < iters; ++i) { + writer.write(input); + } + } - { - int constexpr expected[] = {8, 8, 6}; - input_limit_test_read( - __LINE__, test_files, input, output_limit{0UL}, input_limit{128 * 1024 * 1024UL}, expected); - } + // Although we set `stripe_size_rows` to be very large, the writer only write + // 250k rows (top level) per stripe due to having nested type. + // Thus, we have 200 stripes in total. + // input_limit_test_write(test_files, input, cudf::io::default_stripe_size_rows); - { - int constexpr expected[] = {16, 15, 17}; - input_limit_test_read(__LINE__, - test_files, - input, - output_limit{128 * 1024 * 1024UL}, - input_limit{128 * 1024 * 1024UL}, - expected); - } + if (0) { + int constexpr expected[] = {8, 8, 6}; + auto const [result, num_chunks] = + chunked_read(test_files[0], output_limit{0UL}, input_limit{128 * 1024 * 1024UL}); + EXPECT_EQ(expected[0], num_chunks); + printf("num_chunks: %d\n", (int)num_chunks); - // TODO: remove - { - int constexpr expected[] = {1, 1, 1}; - input_limit_test_read( - __LINE__, test_files, input, output_limit{0UL}, input_limit{0UL}, expected); - } + // TODO: equal + // CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result, input); + + // input_limit_test_read( + // __LINE__, test_files, input, output_limit{0UL}, input_limit{128 * 1024 * 1024UL}, + // expected); + } + + // clang-format off + /* +complete, 1, peak_memory_usage: 24870400 , MB = 23.7183 +complete, 2, peak_memory_usage: 49739984 , MB = 47.4357 +complete, 3, peak_memory_usage: 74609600 , MB = 71.1533 +complete, 4, peak_memory_usage: 99479184 , MB = 94.8707 +complete, 5, peak_memory_usage: 124348528 , MB = 118.588 +complete, 6, peak_memory_usage: 149218128 , MB = 142.305 +complete, 7, peak_memory_usage: 174087728 , MB = 166.023 +complete, 8, peak_memory_usage: 198957312 , MB = 189.74 +complete, 9, peak_memory_usage: 223826672 , MB = 213.458 +complete, 10, peak_memory_usage: 248696256 , MB = 237.175 +complete, 11, peak_memory_usage: 224912432 , MB = 214.493 +complete, 12, peak_memory_usage: 225455472 , MB = 215.011 +complete, 13, peak_memory_usage: 225998192 , MB = 215.529 +complete, 14, peak_memory_usage: 226541072 , MB = 216.046 +complete, 15, peak_memory_usage: 227084080 , MB = 216.564 +complete, 16, peak_memory_usage: 227626832 , MB = 217.082 +complete, 17, peak_memory_usage: 228169712 , MB = 217.6 +complete, 18, peak_memory_usage: 228712592 , MB = 218.117 +complete, 19, peak_memory_usage: 248696256 , MB = 237.175 +complete, 20, peak_memory_usage: 229798352 , MB = 219.153 +complete, 21, peak_memory_usage: 230341600 , MB = 219.671 +complete, 22, peak_memory_usage: 230884096 , MB = 220.188 +complete, 23, peak_memory_usage: 231427152 , MB = 220.706 +complete, 24, peak_memory_usage: 231970080 , MB = 221.224 +complete, 25, peak_memory_usage: 232513136 , MB = 221.742 +complete, 26, peak_memory_usage: 233056016 , MB = 222.26 +complete, 27, peak_memory_usage: 233598624 , MB = 222.777 +complete, 28, peak_memory_usage: 248696256 , MB = 237.175 +complete, 29, peak_memory_usage: 234684480 , MB = 223.813 +complete, 30, peak_memory_usage: 235227984 , MB = 224.331 +complete, 31, peak_memory_usage: 235770208 , MB = 224.848 +complete, 32, peak_memory_usage: 236313040 , MB = 225.366 +complete, 33, peak_memory_usage: 236855888 , MB = 225.883 +complete, 34, peak_memory_usage: 237399504 , MB = 226.402 +complete, 35, peak_memory_usage: 237941776 , MB = 226.919 +complete, 36, peak_memory_usage: 238485504 , MB = 227.438 +complete, 37, peak_memory_usage: 248696256 , MB = 237.175 +complete, 38, peak_memory_usage: 239570400 , MB = 228.472 +complete, 39, peak_memory_usage: 240113728 , MB = 228.99 +complete, 40, peak_memory_usage: 240656512 , MB = 229.508 +complete, 41, peak_memory_usage: 241198848 , MB = 230.025 +complete, 42, peak_memory_usage: 241742608 , MB = 230.544 +complete, 43, peak_memory_usage: 242285536 , MB = 231.061 +complete, 44, peak_memory_usage: 242828368 , MB = 231.579 +complete, 45, peak_memory_usage: 243371008 , MB = 232.097 +complete, 46, peak_memory_usage: 248696256 , MB = 237.175 +complete, 47, peak_memory_usage: 244456448 , MB = 233.132 +complete, 48, peak_memory_usage: 245000016 , MB = 233.65 +complete, 49, peak_memory_usage: 245542256 , MB = 234.167 +complete, 50, peak_memory_usage: 246085472 , MB = 234.685 +complete, 51, peak_memory_usage: 246628768 , MB = 235.204 +complete, 52, peak_memory_usage: 247171088 , MB = 235.721 +complete, 53, peak_memory_usage: 247714240 , MB = 236.239 +complete, 54, peak_memory_usage: 248257248 , MB = 236.757 +complete, 55, peak_memory_usage: 248799808 , MB = 237.274 +complete, 56, peak_memory_usage: 249342880 , MB = 237.792 +complete, 57, peak_memory_usage: 249885808 , MB = 238.31 +complete, 58, peak_memory_usage: 250428960 , MB = 238.828 +complete, 59, peak_memory_usage: 250971984 , MB = 239.346 +complete, 60, peak_memory_usage: 251514080 , MB = 239.863 +complete, 61, peak_memory_usage: 252057616 , MB = 240.381 +complete, 62, peak_memory_usage: 252599968 , MB = 240.898 +complete, 63, peak_memory_usage: 253142992 , MB = 241.416 +complete, 64, peak_memory_usage: 253686064 , MB = 241.934 +complete, 65, peak_memory_usage: 254227872 , MB = 242.451 +complete, 66, peak_memory_usage: 254771152 , MB = 242.969 +complete, 67, peak_memory_usage: 255313872 , MB = 243.486 +complete, 68, peak_memory_usage: 255856912 , MB = 244.004 +complete, 69, peak_memory_usage: 256400048 , MB = 244.522 +complete, 70, peak_memory_usage: 256943040 , MB = 245.04 +complete, 71, peak_memory_usage: 257485520 , MB = 245.557 +complete, 72, peak_memory_usage: 258029520 , MB = 246.076 +complete, 73, peak_memory_usage: 258572064 , MB = 246.594 +complete, 74, peak_memory_usage: 259115328 , MB = 247.112 +complete, 75, peak_memory_usage: 259657776 , MB = 247.629 +complete, 76, peak_memory_usage: 260200864 , MB = 248.147 +complete, 77, peak_memory_usage: 260742832 , MB = 248.664 +complete, 78, peak_memory_usage: 261286496 , MB = 249.182 +complete, 79, peak_memory_usage: 261828432 , MB = 249.699 +complete, 80, peak_memory_usage: 262371920 , MB = 250.217 +complete, 81, peak_memory_usage: 262914432 , MB = 250.735 +complete, 82, peak_memory_usage: 263458960 , MB = 251.254 +complete, 83, peak_memory_usage: 264000816 , MB = 251.771 +complete, 84, peak_memory_usage: 264543056 , MB = 252.288 +complete, 85, peak_memory_usage: 265085984 , MB = 252.806 +complete, 86, peak_memory_usage: 265630256 , MB = 253.325 +complete, 87, peak_memory_usage: 266171696 , MB = 253.841 +complete, 88, peak_memory_usage: 266714432 , MB = 254.359 +complete, 89, peak_memory_usage: 267257392 , MB = 254.877 +complete, 90, peak_memory_usage: 267800176 , MB = 255.394 +complete, 91, peak_memory_usage: 268343536 , MB = 255.912 +complete, 92, peak_memory_usage: 268886256 , MB = 256.43 +complete, 93, peak_memory_usage: 269429968 , MB = 256.948 +complete, 94, peak_memory_usage: 269971904 , MB = 257.465 +complete, 95, peak_memory_usage: 270516528 , MB = 257.985 +complete, 96, peak_memory_usage: 271058992 , MB = 258.502 +complete, 97, peak_memory_usage: 271601616 , MB = 259.019 +complete, 98, peak_memory_usage: 272145536 , MB = 259.538 +complete, 99, peak_memory_usage: 272686496 , MB = 260.054 +complete, 100, peak_memory_usage: 273230448 , MB = 260.573 + +num_chunks: 1 +num_chunks: 1 +num_chunks: 1 +num_chunks: 1 +num_chunks: 1 +num_chunks: 1 +num_chunks: 1 +num_chunks: 1 +num_chunks: 1 +num_chunks: 1 +num_chunks: 2 +num_chunks: 2 +num_chunks: 2 +num_chunks: 2 +num_chunks: 2 +num_chunks: 2 +num_chunks: 2 +num_chunks: 2 +num_chunks: 2 +num_chunks: 3 +num_chunks: 3 +num_chunks: 3 +num_chunks: 3 +num_chunks: 3 +num_chunks: 3 +num_chunks: 3 +num_chunks: 3 +num_chunks: 3 +num_chunks: 4 +num_chunks: 4 +num_chunks: 4 +num_chunks: 4 +num_chunks: 4 +num_chunks: 4 +num_chunks: 4 +num_chunks: 4 +num_chunks: 4 +num_chunks: 5 +num_chunks: 5 +num_chunks: 5 +num_chunks: 5 +num_chunks: 5 +num_chunks: 5 +num_chunks: 5 +num_chunks: 5 +num_chunks: 5 +num_chunks: 6 +num_chunks: 6 +num_chunks: 6 +num_chunks: 6 +num_chunks: 6 +num_chunks: 6 +num_chunks: 6 +num_chunks: 6 +num_chunks: 6 +num_chunks: 7 +num_chunks: 7 +num_chunks: 7 +num_chunks: 7 +num_chunks: 7 +num_chunks: 7 +num_chunks: 7 +num_chunks: 7 +num_chunks: 7 +num_chunks: 8 +num_chunks: 8 +num_chunks: 8 +num_chunks: 8 +num_chunks: 8 +num_chunks: 8 +num_chunks: 8 +num_chunks: 8 +num_chunks: 8 +num_chunks: 9 +num_chunks: 9 +num_chunks: 9 +num_chunks: 9 +num_chunks: 9 +num_chunks: 9 +num_chunks: 9 +num_chunks: 9 +num_chunks: 9 +num_chunks: 10 +num_chunks: 10 +num_chunks: 10 +num_chunks: 10 +num_chunks: 10 +num_chunks: 10 +num_chunks: 10 +num_chunks: 10 +num_chunks: 10 +num_chunks: 11 +num_chunks: 11 +num_chunks: 11 +num_chunks: 11 +num_chunks: 11 +num_chunks: 11 +num_chunks: 11 +num_chunks: 11 +num_chunks: 11 + + */ + // clang-format on + + printf("\n\n\n\n read full\n"); + fflush(stdout); + // TODO: remove + { + int constexpr expected[] = {1, 1, 1}; + // input_limit_test_read( + // __LINE__, test_files, input, output_limit{0UL}, input_limit{0UL}, expected); + auto const [result, num_chunks] = + chunked_read(test_files[0], output_limit{0UL}, input_limit{0UL}); + EXPECT_EQ(expected[0], num_chunks); + printf("num_chunks: %d\n", (int)num_chunks); + // TODO: equal + // CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result, input); + } + + // clang-format off +/* +complete, 1, peak_memory_usage: 24870400 , MB = 23.7183 +complete, 2, peak_memory_usage: 49739984 , MB = 47.4357 +complete, 3, peak_memory_usage: 74609600 , MB = 71.1533 +complete, 4, peak_memory_usage: 99479184 , MB = 94.8707 +complete, 5, peak_memory_usage: 124348528 , MB = 118.588 +complete, 6, peak_memory_usage: 149218128 , MB = 142.305 +complete, 7, peak_memory_usage: 174087728 , MB = 166.023 +complete, 8, peak_memory_usage: 198957312 , MB = 189.74 +complete, 9, peak_memory_usage: 223826672 , MB = 213.458 +complete, 10, peak_memory_usage: 248696256 , MB = 237.175 +complete, 11, peak_memory_usage: 273565872 , MB = 260.893 +complete, 12, peak_memory_usage: 298435456 , MB = 284.61 +complete, 13, peak_memory_usage: 323304800 , MB = 308.327 +complete, 14, peak_memory_usage: 348174400 , MB = 332.045 +complete, 15, peak_memory_usage: 373044000 , MB = 355.762 +complete, 16, peak_memory_usage: 397913584 , MB = 379.48 +complete, 17, peak_memory_usage: 422782944 , MB = 403.197 +complete, 18, peak_memory_usage: 447652528 , MB = 426.915 +complete, 19, peak_memory_usage: 472522144 , MB = 450.632 +complete, 20, peak_memory_usage: 497391728 , MB = 474.35 +complete, 21, peak_memory_usage: 522261072 , MB = 498.067 +complete, 22, peak_memory_usage: 547130672 , MB = 521.784 +complete, 23, peak_memory_usage: 572000272 , MB = 545.502 +complete, 24, peak_memory_usage: 596869856 , MB = 569.219 +complete, 25, peak_memory_usage: 621739216 , MB = 592.937 +complete, 26, peak_memory_usage: 646608800 , MB = 616.654 +complete, 27, peak_memory_usage: 671478416 , MB = 640.372 +complete, 28, peak_memory_usage: 696348000 , MB = 664.089 +complete, 29, peak_memory_usage: 721217344 , MB = 687.806 +complete, 30, peak_memory_usage: 746086944 , MB = 711.524 +complete, 31, peak_memory_usage: 770956544 , MB = 735.241 +complete, 32, peak_memory_usage: 795826128 , MB = 758.959 +complete, 33, peak_memory_usage: 820695488 , MB = 782.676 +complete, 34, peak_memory_usage: 845565072 , MB = 806.394 +complete, 35, peak_memory_usage: 870434688 , MB = 830.111 +complete, 36, peak_memory_usage: 895304272 , MB = 853.829 +complete, 37, peak_memory_usage: 920173616 , MB = 877.546 +complete, 38, peak_memory_usage: 945043216 , MB = 901.263 +complete, 39, peak_memory_usage: 969912816 , MB = 924.981 +complete, 40, peak_memory_usage: 994782400 , MB = 948.698 +complete, 41, peak_memory_usage: 1019651760 , MB = 972.416 +complete, 42, peak_memory_usage: 1044521344 , MB = 996.133 +complete, 43, peak_memory_usage: 1069390960 , MB = 1019.85 +complete, 44, peak_memory_usage: 1094260544 , MB = 1043.57 +complete, 45, peak_memory_usage: 1119129888 , MB = 1067.29 +complete, 46, peak_memory_usage: 1143999488 , MB = 1091 +complete, 47, peak_memory_usage: 1168869088 , MB = 1114.72 +complete, 48, peak_memory_usage: 1193738672 , MB = 1138.44 +complete, 49, peak_memory_usage: 1218608032 , MB = 1162.16 +complete, 50, peak_memory_usage: 1243477616 , MB = 1185.87 +complete, 51, peak_memory_usage: 1268347232 , MB = 1209.59 +complete, 52, peak_memory_usage: 1293216816 , MB = 1233.31 +complete, 53, peak_memory_usage: 1318086160 , MB = 1257.02 +complete, 54, peak_memory_usage: 1342955760 , MB = 1280.74 +complete, 55, peak_memory_usage: 1367825360 , MB = 1304.46 +complete, 56, peak_memory_usage: 1392694944 , MB = 1328.18 +complete, 57, peak_memory_usage: 1417564560 , MB = 1351.89 +complete, 58, peak_memory_usage: 1442433888 , MB = 1375.61 +complete, 59, peak_memory_usage: 1467303504 , MB = 1399.33 +complete, 60, peak_memory_usage: 1492173088 , MB = 1423.05 +complete, 61, peak_memory_usage: 1517042688 , MB = 1446.76 +complete, 62, peak_memory_usage: 1541912032 , MB = 1470.48 +complete, 63, peak_memory_usage: 1566781632 , MB = 1494.2 +complete, 64, peak_memory_usage: 1591651216 , MB = 1517.92 +complete, 65, peak_memory_usage: 1616520832 , MB = 1541.63 +complete, 66, peak_memory_usage: 1641390160 , MB = 1565.35 +complete, 67, peak_memory_usage: 1666259776 , MB = 1589.07 +complete, 68, peak_memory_usage: 1691129360 , MB = 1612.79 +complete, 69, peak_memory_usage: 1715998960 , MB = 1636.5 +complete, 70, peak_memory_usage: 1740868304 , MB = 1660.22 +complete, 71, peak_memory_usage: 1765737904 , MB = 1683.94 +complete, 72, peak_memory_usage: 1790607488 , MB = 1707.66 +complete, 73, peak_memory_usage: 1815477104 , MB = 1731.37 +complete, 74, peak_memory_usage: 1840346432 , MB = 1755.09 +complete, 75, peak_memory_usage: 1865216048 , MB = 1778.81 +complete, 76, peak_memory_usage: 1890085632 , MB = 1802.53 +complete, 77, peak_memory_usage: 1914955232 , MB = 1826.24 +complete, 78, peak_memory_usage: 1939824576 , MB = 1849.96 +complete, 79, peak_memory_usage: 1964694176 , MB = 1873.68 +complete, 80, peak_memory_usage: 1989563760 , MB = 1897.4 +complete, 81, peak_memory_usage: 2014433376 , MB = 1921.11 +complete, 82, peak_memory_usage: 2039302704 , MB = 1944.83 +complete, 83, peak_memory_usage: 2064172320 , MB = 1968.55 +complete, 84, peak_memory_usage: 2089041904 , MB = 1992.27 +complete, 85, peak_memory_usage: 2113911504 , MB = 2015.98 +complete, 86, peak_memory_usage: 2138780848 , MB = 2039.7 +complete, 87, peak_memory_usage: 2163650448 , MB = 2063.42 +complete, 88, peak_memory_usage: 2188520032 , MB = 2087.14 +complete, 89, peak_memory_usage: 2213389648 , MB = 2110.85 +complete, 90, peak_memory_usage: 2238258976 , MB = 2134.57 +complete, 91, peak_memory_usage: 2263128592 , MB = 2158.29 +complete, 92, peak_memory_usage: 2287998176 , MB = 2182.01 +complete, 93, peak_memory_usage: 2312867776 , MB = 2205.72 +complete, 94, peak_memory_usage: 2337737120 , MB = 2229.44 +complete, 95, peak_memory_usage: 2362606720 , MB = 2253.16 +complete, 96, peak_memory_usage: 2387476304 , MB = 2276.87 +complete, 97, peak_memory_usage: 2412345920 , MB = 2300.59 +complete, 98, peak_memory_usage: 2437215248 , MB = 2324.31 +complete, 99, peak_memory_usage: 2462084864 , MB = 2348.03 +complete, 100, peak_memory_usage: 2486954448 , MB = 2371.74 +*/ + // clang-format on + + } // end iters } TEST_F(OrcChunkedReaderInputLimitTest, ReadWithRowSelection) From 1bee174cd9f297c30f2034b4b246588a5a0514ce Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 4 Mar 2024 11:07:05 -0800 Subject: [PATCH 162/321] Revert "Add a temporary test" This reverts commit bdb586ead08f044119b93d55e654e00b7f31b575. --- cpp/tests/io/orc_chunked_reader_test.cu | 384 ++---------------------- 1 file changed, 25 insertions(+), 359 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 3a2ace205b4..13f0311ab1c 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1182,7 +1182,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, ListType) TEST_F(OrcChunkedReaderInputLimitTest, MixedColumnsHavingList) { - int constexpr num_rows = 1'000'000; + int constexpr num_rows = 50'000'000; int constexpr list_size = 4; int constexpr str_size = 3; @@ -1241,367 +1241,33 @@ TEST_F(OrcChunkedReaderInputLimitTest, MixedColumnsHavingList) auto const test_files = input_limit_get_test_names(temp_env->get_temp_filepath(filename)); auto const input = cudf::table_view{{*lists_col, *str_col, *double_col}}; - for (int iters = 1; iters <= 100; ++iters) { - { - auto const write_opts = - cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{test_files[0]}) - .stripe_size_rows(cudf::io::default_stripe_size_rows) - .build(); - - auto writer = cudf::io::orc_chunked_writer(write_opts); - for (int i = 0; i < iters; ++i) { - writer.write(input); - } - } - - // Although we set `stripe_size_rows` to be very large, the writer only write - // 250k rows (top level) per stripe due to having nested type. - // Thus, we have 200 stripes in total. - // input_limit_test_write(test_files, input, cudf::io::default_stripe_size_rows); - - if (0) { - int constexpr expected[] = {8, 8, 6}; - auto const [result, num_chunks] = - chunked_read(test_files[0], output_limit{0UL}, input_limit{128 * 1024 * 1024UL}); - EXPECT_EQ(expected[0], num_chunks); - printf("num_chunks: %d\n", (int)num_chunks); - - // TODO: equal - // CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result, input); + // Although we set `stripe_size_rows` to be very large, the writer only write + // 250k rows (top level) per stripe due to having nested type. + // Thus, we have 200 stripes in total. + input_limit_test_write(test_files, input, cudf::io::default_stripe_size_rows); - // input_limit_test_read( - // __LINE__, test_files, input, output_limit{0UL}, input_limit{128 * 1024 * 1024UL}, - // expected); - } + { + int constexpr expected[] = {8, 8, 6}; + input_limit_test_read( + __LINE__, test_files, input, output_limit{0UL}, input_limit{128 * 1024 * 1024UL}, expected); + } - // clang-format off - /* -complete, 1, peak_memory_usage: 24870400 , MB = 23.7183 -complete, 2, peak_memory_usage: 49739984 , MB = 47.4357 -complete, 3, peak_memory_usage: 74609600 , MB = 71.1533 -complete, 4, peak_memory_usage: 99479184 , MB = 94.8707 -complete, 5, peak_memory_usage: 124348528 , MB = 118.588 -complete, 6, peak_memory_usage: 149218128 , MB = 142.305 -complete, 7, peak_memory_usage: 174087728 , MB = 166.023 -complete, 8, peak_memory_usage: 198957312 , MB = 189.74 -complete, 9, peak_memory_usage: 223826672 , MB = 213.458 -complete, 10, peak_memory_usage: 248696256 , MB = 237.175 -complete, 11, peak_memory_usage: 224912432 , MB = 214.493 -complete, 12, peak_memory_usage: 225455472 , MB = 215.011 -complete, 13, peak_memory_usage: 225998192 , MB = 215.529 -complete, 14, peak_memory_usage: 226541072 , MB = 216.046 -complete, 15, peak_memory_usage: 227084080 , MB = 216.564 -complete, 16, peak_memory_usage: 227626832 , MB = 217.082 -complete, 17, peak_memory_usage: 228169712 , MB = 217.6 -complete, 18, peak_memory_usage: 228712592 , MB = 218.117 -complete, 19, peak_memory_usage: 248696256 , MB = 237.175 -complete, 20, peak_memory_usage: 229798352 , MB = 219.153 -complete, 21, peak_memory_usage: 230341600 , MB = 219.671 -complete, 22, peak_memory_usage: 230884096 , MB = 220.188 -complete, 23, peak_memory_usage: 231427152 , MB = 220.706 -complete, 24, peak_memory_usage: 231970080 , MB = 221.224 -complete, 25, peak_memory_usage: 232513136 , MB = 221.742 -complete, 26, peak_memory_usage: 233056016 , MB = 222.26 -complete, 27, peak_memory_usage: 233598624 , MB = 222.777 -complete, 28, peak_memory_usage: 248696256 , MB = 237.175 -complete, 29, peak_memory_usage: 234684480 , MB = 223.813 -complete, 30, peak_memory_usage: 235227984 , MB = 224.331 -complete, 31, peak_memory_usage: 235770208 , MB = 224.848 -complete, 32, peak_memory_usage: 236313040 , MB = 225.366 -complete, 33, peak_memory_usage: 236855888 , MB = 225.883 -complete, 34, peak_memory_usage: 237399504 , MB = 226.402 -complete, 35, peak_memory_usage: 237941776 , MB = 226.919 -complete, 36, peak_memory_usage: 238485504 , MB = 227.438 -complete, 37, peak_memory_usage: 248696256 , MB = 237.175 -complete, 38, peak_memory_usage: 239570400 , MB = 228.472 -complete, 39, peak_memory_usage: 240113728 , MB = 228.99 -complete, 40, peak_memory_usage: 240656512 , MB = 229.508 -complete, 41, peak_memory_usage: 241198848 , MB = 230.025 -complete, 42, peak_memory_usage: 241742608 , MB = 230.544 -complete, 43, peak_memory_usage: 242285536 , MB = 231.061 -complete, 44, peak_memory_usage: 242828368 , MB = 231.579 -complete, 45, peak_memory_usage: 243371008 , MB = 232.097 -complete, 46, peak_memory_usage: 248696256 , MB = 237.175 -complete, 47, peak_memory_usage: 244456448 , MB = 233.132 -complete, 48, peak_memory_usage: 245000016 , MB = 233.65 -complete, 49, peak_memory_usage: 245542256 , MB = 234.167 -complete, 50, peak_memory_usage: 246085472 , MB = 234.685 -complete, 51, peak_memory_usage: 246628768 , MB = 235.204 -complete, 52, peak_memory_usage: 247171088 , MB = 235.721 -complete, 53, peak_memory_usage: 247714240 , MB = 236.239 -complete, 54, peak_memory_usage: 248257248 , MB = 236.757 -complete, 55, peak_memory_usage: 248799808 , MB = 237.274 -complete, 56, peak_memory_usage: 249342880 , MB = 237.792 -complete, 57, peak_memory_usage: 249885808 , MB = 238.31 -complete, 58, peak_memory_usage: 250428960 , MB = 238.828 -complete, 59, peak_memory_usage: 250971984 , MB = 239.346 -complete, 60, peak_memory_usage: 251514080 , MB = 239.863 -complete, 61, peak_memory_usage: 252057616 , MB = 240.381 -complete, 62, peak_memory_usage: 252599968 , MB = 240.898 -complete, 63, peak_memory_usage: 253142992 , MB = 241.416 -complete, 64, peak_memory_usage: 253686064 , MB = 241.934 -complete, 65, peak_memory_usage: 254227872 , MB = 242.451 -complete, 66, peak_memory_usage: 254771152 , MB = 242.969 -complete, 67, peak_memory_usage: 255313872 , MB = 243.486 -complete, 68, peak_memory_usage: 255856912 , MB = 244.004 -complete, 69, peak_memory_usage: 256400048 , MB = 244.522 -complete, 70, peak_memory_usage: 256943040 , MB = 245.04 -complete, 71, peak_memory_usage: 257485520 , MB = 245.557 -complete, 72, peak_memory_usage: 258029520 , MB = 246.076 -complete, 73, peak_memory_usage: 258572064 , MB = 246.594 -complete, 74, peak_memory_usage: 259115328 , MB = 247.112 -complete, 75, peak_memory_usage: 259657776 , MB = 247.629 -complete, 76, peak_memory_usage: 260200864 , MB = 248.147 -complete, 77, peak_memory_usage: 260742832 , MB = 248.664 -complete, 78, peak_memory_usage: 261286496 , MB = 249.182 -complete, 79, peak_memory_usage: 261828432 , MB = 249.699 -complete, 80, peak_memory_usage: 262371920 , MB = 250.217 -complete, 81, peak_memory_usage: 262914432 , MB = 250.735 -complete, 82, peak_memory_usage: 263458960 , MB = 251.254 -complete, 83, peak_memory_usage: 264000816 , MB = 251.771 -complete, 84, peak_memory_usage: 264543056 , MB = 252.288 -complete, 85, peak_memory_usage: 265085984 , MB = 252.806 -complete, 86, peak_memory_usage: 265630256 , MB = 253.325 -complete, 87, peak_memory_usage: 266171696 , MB = 253.841 -complete, 88, peak_memory_usage: 266714432 , MB = 254.359 -complete, 89, peak_memory_usage: 267257392 , MB = 254.877 -complete, 90, peak_memory_usage: 267800176 , MB = 255.394 -complete, 91, peak_memory_usage: 268343536 , MB = 255.912 -complete, 92, peak_memory_usage: 268886256 , MB = 256.43 -complete, 93, peak_memory_usage: 269429968 , MB = 256.948 -complete, 94, peak_memory_usage: 269971904 , MB = 257.465 -complete, 95, peak_memory_usage: 270516528 , MB = 257.985 -complete, 96, peak_memory_usage: 271058992 , MB = 258.502 -complete, 97, peak_memory_usage: 271601616 , MB = 259.019 -complete, 98, peak_memory_usage: 272145536 , MB = 259.538 -complete, 99, peak_memory_usage: 272686496 , MB = 260.054 -complete, 100, peak_memory_usage: 273230448 , MB = 260.573 - -num_chunks: 1 -num_chunks: 1 -num_chunks: 1 -num_chunks: 1 -num_chunks: 1 -num_chunks: 1 -num_chunks: 1 -num_chunks: 1 -num_chunks: 1 -num_chunks: 1 -num_chunks: 2 -num_chunks: 2 -num_chunks: 2 -num_chunks: 2 -num_chunks: 2 -num_chunks: 2 -num_chunks: 2 -num_chunks: 2 -num_chunks: 2 -num_chunks: 3 -num_chunks: 3 -num_chunks: 3 -num_chunks: 3 -num_chunks: 3 -num_chunks: 3 -num_chunks: 3 -num_chunks: 3 -num_chunks: 3 -num_chunks: 4 -num_chunks: 4 -num_chunks: 4 -num_chunks: 4 -num_chunks: 4 -num_chunks: 4 -num_chunks: 4 -num_chunks: 4 -num_chunks: 4 -num_chunks: 5 -num_chunks: 5 -num_chunks: 5 -num_chunks: 5 -num_chunks: 5 -num_chunks: 5 -num_chunks: 5 -num_chunks: 5 -num_chunks: 5 -num_chunks: 6 -num_chunks: 6 -num_chunks: 6 -num_chunks: 6 -num_chunks: 6 -num_chunks: 6 -num_chunks: 6 -num_chunks: 6 -num_chunks: 6 -num_chunks: 7 -num_chunks: 7 -num_chunks: 7 -num_chunks: 7 -num_chunks: 7 -num_chunks: 7 -num_chunks: 7 -num_chunks: 7 -num_chunks: 7 -num_chunks: 8 -num_chunks: 8 -num_chunks: 8 -num_chunks: 8 -num_chunks: 8 -num_chunks: 8 -num_chunks: 8 -num_chunks: 8 -num_chunks: 8 -num_chunks: 9 -num_chunks: 9 -num_chunks: 9 -num_chunks: 9 -num_chunks: 9 -num_chunks: 9 -num_chunks: 9 -num_chunks: 9 -num_chunks: 9 -num_chunks: 10 -num_chunks: 10 -num_chunks: 10 -num_chunks: 10 -num_chunks: 10 -num_chunks: 10 -num_chunks: 10 -num_chunks: 10 -num_chunks: 10 -num_chunks: 11 -num_chunks: 11 -num_chunks: 11 -num_chunks: 11 -num_chunks: 11 -num_chunks: 11 -num_chunks: 11 -num_chunks: 11 -num_chunks: 11 - - */ - // clang-format on - - printf("\n\n\n\n read full\n"); - fflush(stdout); - // TODO: remove - { - int constexpr expected[] = {1, 1, 1}; - // input_limit_test_read( - // __LINE__, test_files, input, output_limit{0UL}, input_limit{0UL}, expected); - auto const [result, num_chunks] = - chunked_read(test_files[0], output_limit{0UL}, input_limit{0UL}); - EXPECT_EQ(expected[0], num_chunks); - printf("num_chunks: %d\n", (int)num_chunks); - // TODO: equal - // CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result, input); - } + { + int constexpr expected[] = {16, 15, 17}; + input_limit_test_read(__LINE__, + test_files, + input, + output_limit{128 * 1024 * 1024UL}, + input_limit{128 * 1024 * 1024UL}, + expected); + } - // clang-format off -/* -complete, 1, peak_memory_usage: 24870400 , MB = 23.7183 -complete, 2, peak_memory_usage: 49739984 , MB = 47.4357 -complete, 3, peak_memory_usage: 74609600 , MB = 71.1533 -complete, 4, peak_memory_usage: 99479184 , MB = 94.8707 -complete, 5, peak_memory_usage: 124348528 , MB = 118.588 -complete, 6, peak_memory_usage: 149218128 , MB = 142.305 -complete, 7, peak_memory_usage: 174087728 , MB = 166.023 -complete, 8, peak_memory_usage: 198957312 , MB = 189.74 -complete, 9, peak_memory_usage: 223826672 , MB = 213.458 -complete, 10, peak_memory_usage: 248696256 , MB = 237.175 -complete, 11, peak_memory_usage: 273565872 , MB = 260.893 -complete, 12, peak_memory_usage: 298435456 , MB = 284.61 -complete, 13, peak_memory_usage: 323304800 , MB = 308.327 -complete, 14, peak_memory_usage: 348174400 , MB = 332.045 -complete, 15, peak_memory_usage: 373044000 , MB = 355.762 -complete, 16, peak_memory_usage: 397913584 , MB = 379.48 -complete, 17, peak_memory_usage: 422782944 , MB = 403.197 -complete, 18, peak_memory_usage: 447652528 , MB = 426.915 -complete, 19, peak_memory_usage: 472522144 , MB = 450.632 -complete, 20, peak_memory_usage: 497391728 , MB = 474.35 -complete, 21, peak_memory_usage: 522261072 , MB = 498.067 -complete, 22, peak_memory_usage: 547130672 , MB = 521.784 -complete, 23, peak_memory_usage: 572000272 , MB = 545.502 -complete, 24, peak_memory_usage: 596869856 , MB = 569.219 -complete, 25, peak_memory_usage: 621739216 , MB = 592.937 -complete, 26, peak_memory_usage: 646608800 , MB = 616.654 -complete, 27, peak_memory_usage: 671478416 , MB = 640.372 -complete, 28, peak_memory_usage: 696348000 , MB = 664.089 -complete, 29, peak_memory_usage: 721217344 , MB = 687.806 -complete, 30, peak_memory_usage: 746086944 , MB = 711.524 -complete, 31, peak_memory_usage: 770956544 , MB = 735.241 -complete, 32, peak_memory_usage: 795826128 , MB = 758.959 -complete, 33, peak_memory_usage: 820695488 , MB = 782.676 -complete, 34, peak_memory_usage: 845565072 , MB = 806.394 -complete, 35, peak_memory_usage: 870434688 , MB = 830.111 -complete, 36, peak_memory_usage: 895304272 , MB = 853.829 -complete, 37, peak_memory_usage: 920173616 , MB = 877.546 -complete, 38, peak_memory_usage: 945043216 , MB = 901.263 -complete, 39, peak_memory_usage: 969912816 , MB = 924.981 -complete, 40, peak_memory_usage: 994782400 , MB = 948.698 -complete, 41, peak_memory_usage: 1019651760 , MB = 972.416 -complete, 42, peak_memory_usage: 1044521344 , MB = 996.133 -complete, 43, peak_memory_usage: 1069390960 , MB = 1019.85 -complete, 44, peak_memory_usage: 1094260544 , MB = 1043.57 -complete, 45, peak_memory_usage: 1119129888 , MB = 1067.29 -complete, 46, peak_memory_usage: 1143999488 , MB = 1091 -complete, 47, peak_memory_usage: 1168869088 , MB = 1114.72 -complete, 48, peak_memory_usage: 1193738672 , MB = 1138.44 -complete, 49, peak_memory_usage: 1218608032 , MB = 1162.16 -complete, 50, peak_memory_usage: 1243477616 , MB = 1185.87 -complete, 51, peak_memory_usage: 1268347232 , MB = 1209.59 -complete, 52, peak_memory_usage: 1293216816 , MB = 1233.31 -complete, 53, peak_memory_usage: 1318086160 , MB = 1257.02 -complete, 54, peak_memory_usage: 1342955760 , MB = 1280.74 -complete, 55, peak_memory_usage: 1367825360 , MB = 1304.46 -complete, 56, peak_memory_usage: 1392694944 , MB = 1328.18 -complete, 57, peak_memory_usage: 1417564560 , MB = 1351.89 -complete, 58, peak_memory_usage: 1442433888 , MB = 1375.61 -complete, 59, peak_memory_usage: 1467303504 , MB = 1399.33 -complete, 60, peak_memory_usage: 1492173088 , MB = 1423.05 -complete, 61, peak_memory_usage: 1517042688 , MB = 1446.76 -complete, 62, peak_memory_usage: 1541912032 , MB = 1470.48 -complete, 63, peak_memory_usage: 1566781632 , MB = 1494.2 -complete, 64, peak_memory_usage: 1591651216 , MB = 1517.92 -complete, 65, peak_memory_usage: 1616520832 , MB = 1541.63 -complete, 66, peak_memory_usage: 1641390160 , MB = 1565.35 -complete, 67, peak_memory_usage: 1666259776 , MB = 1589.07 -complete, 68, peak_memory_usage: 1691129360 , MB = 1612.79 -complete, 69, peak_memory_usage: 1715998960 , MB = 1636.5 -complete, 70, peak_memory_usage: 1740868304 , MB = 1660.22 -complete, 71, peak_memory_usage: 1765737904 , MB = 1683.94 -complete, 72, peak_memory_usage: 1790607488 , MB = 1707.66 -complete, 73, peak_memory_usage: 1815477104 , MB = 1731.37 -complete, 74, peak_memory_usage: 1840346432 , MB = 1755.09 -complete, 75, peak_memory_usage: 1865216048 , MB = 1778.81 -complete, 76, peak_memory_usage: 1890085632 , MB = 1802.53 -complete, 77, peak_memory_usage: 1914955232 , MB = 1826.24 -complete, 78, peak_memory_usage: 1939824576 , MB = 1849.96 -complete, 79, peak_memory_usage: 1964694176 , MB = 1873.68 -complete, 80, peak_memory_usage: 1989563760 , MB = 1897.4 -complete, 81, peak_memory_usage: 2014433376 , MB = 1921.11 -complete, 82, peak_memory_usage: 2039302704 , MB = 1944.83 -complete, 83, peak_memory_usage: 2064172320 , MB = 1968.55 -complete, 84, peak_memory_usage: 2089041904 , MB = 1992.27 -complete, 85, peak_memory_usage: 2113911504 , MB = 2015.98 -complete, 86, peak_memory_usage: 2138780848 , MB = 2039.7 -complete, 87, peak_memory_usage: 2163650448 , MB = 2063.42 -complete, 88, peak_memory_usage: 2188520032 , MB = 2087.14 -complete, 89, peak_memory_usage: 2213389648 , MB = 2110.85 -complete, 90, peak_memory_usage: 2238258976 , MB = 2134.57 -complete, 91, peak_memory_usage: 2263128592 , MB = 2158.29 -complete, 92, peak_memory_usage: 2287998176 , MB = 2182.01 -complete, 93, peak_memory_usage: 2312867776 , MB = 2205.72 -complete, 94, peak_memory_usage: 2337737120 , MB = 2229.44 -complete, 95, peak_memory_usage: 2362606720 , MB = 2253.16 -complete, 96, peak_memory_usage: 2387476304 , MB = 2276.87 -complete, 97, peak_memory_usage: 2412345920 , MB = 2300.59 -complete, 98, peak_memory_usage: 2437215248 , MB = 2324.31 -complete, 99, peak_memory_usage: 2462084864 , MB = 2348.03 -complete, 100, peak_memory_usage: 2486954448 , MB = 2371.74 -*/ - // clang-format on - - } // end iters + // TODO: remove + { + int constexpr expected[] = {1, 1, 1}; + input_limit_test_read( + __LINE__, test_files, input, output_limit{0UL}, input_limit{0UL}, expected); + } } TEST_F(OrcChunkedReaderInputLimitTest, ReadWithRowSelection) From 17096d33c4375594f1d3ad0f04ae570601cf56bc Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 4 Mar 2024 13:02:30 -0800 Subject: [PATCH 163/321] Fix format Signed-off-by: Nghia Truong --- cpp/tests/CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 058b8555378..b294efbbad6 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -287,9 +287,7 @@ ConfigureTest( PERCENT 30 ) ConfigureTest( - ORC_TEST - io/orc_chunked_reader_test.cu - io/orc_test.cpp + ORC_TEST io/orc_chunked_reader_test.cu io/orc_test.cpp GPUS 1 PERCENT 30 ) From 18a4e9ff27d412b0b9a3b7f5c3753bb146897e88 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 5 Mar 2024 08:07:53 -0800 Subject: [PATCH 164/321] Temporarily fix use-after-free bug Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cu | 50 +++++++++++++------------ 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 13f0311ab1c..d1fa977d80f 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -112,34 +112,38 @@ auto chunked_read(std::string const& filepath, input_limit input_limit_bytes = input_limit{0}, output_row_granularity output_granularity = output_row_granularity{10'000}) { - auto const read_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).build(); - auto reader = cudf::io::chunked_orc_reader(static_cast(output_limit_bytes), - static_cast(input_limit_bytes), - static_cast(output_granularity), - read_opts); - auto num_chunks = 0; auto out_tables = std::vector>{}; + auto out_tviews = std::vector{}; - do { - auto chunk = reader.read_chunk(); - // If the input file is empty, the first call to `read_chunk` will return an empty table. - // Thus, we only check for non-empty output table from the second call. - if (num_chunks > 0) { - CUDF_EXPECTS(chunk.tbl->num_rows() != 0, "Number of rows in the new chunk is zero."); + // TODO: remove this scope, when we get rid of mem stat in the reader. + // This is to avoid use-after-free of memory resource created by the mem stat object. + { + auto const read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).build(); + auto reader = cudf::io::chunked_orc_reader(static_cast(output_limit_bytes), + static_cast(input_limit_bytes), + static_cast(output_granularity), + read_opts); + + do { + auto chunk = reader.read_chunk(); + // If the input file is empty, the first call to `read_chunk` will return an empty table. + // Thus, we only check for non-empty output table from the second call. + if (num_chunks > 0) { + CUDF_EXPECTS(chunk.tbl->num_rows() != 0, "Number of rows in the new chunk is zero."); + } + ++num_chunks; + out_tables.emplace_back(std::move(chunk.tbl)); + } while (reader.has_next()); + + if (num_chunks > 1) { + CUDF_EXPECTS(out_tables.front()->num_rows() != 0, "Number of rows in the new chunk is zero."); } - ++num_chunks; - out_tables.emplace_back(std::move(chunk.tbl)); - } while (reader.has_next()); - - if (num_chunks > 1) { - CUDF_EXPECTS(out_tables.front()->num_rows() != 0, "Number of rows in the new chunk is zero."); - } - auto out_tviews = std::vector{}; - for (auto const& tbl : out_tables) { - out_tviews.emplace_back(tbl->view()); + for (auto const& tbl : out_tables) { + out_tviews.emplace_back(tbl->view()); + } } return std::pair(cudf::concatenate(out_tviews), num_chunks); From 969781368e6c4ede43f32bf1edaadd4dab2d8014 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 5 Mar 2024 08:12:24 -0800 Subject: [PATCH 165/321] Revert "Temporarily fix use-after-free bug" This reverts commit 18a4e9ff27d412b0b9a3b7f5c3753bb146897e88. --- cpp/tests/io/orc_chunked_reader_test.cu | 50 ++++++++++++------------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index d1fa977d80f..13f0311ab1c 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -112,38 +112,34 @@ auto chunked_read(std::string const& filepath, input_limit input_limit_bytes = input_limit{0}, output_row_granularity output_granularity = output_row_granularity{10'000}) { + auto const read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).build(); + auto reader = cudf::io::chunked_orc_reader(static_cast(output_limit_bytes), + static_cast(input_limit_bytes), + static_cast(output_granularity), + read_opts); + auto num_chunks = 0; auto out_tables = std::vector>{}; - auto out_tviews = std::vector{}; - // TODO: remove this scope, when we get rid of mem stat in the reader. - // This is to avoid use-after-free of memory resource created by the mem stat object. - { - auto const read_opts = - cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).build(); - auto reader = cudf::io::chunked_orc_reader(static_cast(output_limit_bytes), - static_cast(input_limit_bytes), - static_cast(output_granularity), - read_opts); - - do { - auto chunk = reader.read_chunk(); - // If the input file is empty, the first call to `read_chunk` will return an empty table. - // Thus, we only check for non-empty output table from the second call. - if (num_chunks > 0) { - CUDF_EXPECTS(chunk.tbl->num_rows() != 0, "Number of rows in the new chunk is zero."); - } - ++num_chunks; - out_tables.emplace_back(std::move(chunk.tbl)); - } while (reader.has_next()); - - if (num_chunks > 1) { - CUDF_EXPECTS(out_tables.front()->num_rows() != 0, "Number of rows in the new chunk is zero."); + do { + auto chunk = reader.read_chunk(); + // If the input file is empty, the first call to `read_chunk` will return an empty table. + // Thus, we only check for non-empty output table from the second call. + if (num_chunks > 0) { + CUDF_EXPECTS(chunk.tbl->num_rows() != 0, "Number of rows in the new chunk is zero."); } + ++num_chunks; + out_tables.emplace_back(std::move(chunk.tbl)); + } while (reader.has_next()); - for (auto const& tbl : out_tables) { - out_tviews.emplace_back(tbl->view()); - } + if (num_chunks > 1) { + CUDF_EXPECTS(out_tables.front()->num_rows() != 0, "Number of rows in the new chunk is zero."); + } + + auto out_tviews = std::vector{}; + for (auto const& tbl : out_tables) { + out_tviews.emplace_back(tbl->view()); } return std::pair(cudf::concatenate(out_tviews), num_chunks); From 001693577dc8205bd51c2ecc71223be0e0593fee Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 5 Mar 2024 08:12:45 -0800 Subject: [PATCH 166/321] This is indeed the fix for use-after-free bug Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cu | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 13f0311ab1c..2857b82d415 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -122,6 +122,10 @@ auto chunked_read(std::string const& filepath, auto num_chunks = 0; auto out_tables = std::vector>{}; + // TODO: remove this scope, when we get rid of mem stat in the reader. + // This is to avoid use-after-free of memory resource created by the mem stat object. + auto mr = rmm::mr::get_current_device_resource(); + do { auto chunk = reader.read_chunk(); // If the input file is empty, the first call to `read_chunk` will return an empty table. @@ -142,7 +146,10 @@ auto chunked_read(std::string const& filepath, out_tviews.emplace_back(tbl->view()); } - return std::pair(cudf::concatenate(out_tviews), num_chunks); + // return std::pair(cudf::concatenate(out_tviews), num_chunks); + + // TODO: remove this + return std::pair(cudf::concatenate(out_tviews, cudf::get_default_stream(), mr), num_chunks); } auto chunked_read(std::string const& filepath, From 759246d81c15106bbd67f149a4e309bdd94a4196 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 5 Mar 2024 13:21:15 -0800 Subject: [PATCH 167/321] Final workaround for use-after-free bug Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 4 ++-- cpp/src/io/orc/reader_impl.hpp | 21 ++++++++++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 1705369e7dd..5f2a140ed4b 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -1631,6 +1631,7 @@ reader::impl::impl(std::size_t output_size_limit, rmm::mr::device_memory_resource* mr) : _stream(stream), _mr(mr), + mem_stats_logger(mr), _config{options.get_timestamp_type(), options.is_enabled_use_index(), options.is_enabled_use_np_dtypes(), @@ -1645,8 +1646,7 @@ reader::impl::impl(std::size_t output_size_limit, _chunk_read_data{ output_size_limit, data_read_limit, - output_row_granularity > 0 ? output_row_granularity : DEFAULT_OUTPUT_ROW_GRANULARITY}, - mem_stats_logger(mr) + output_row_granularity > 0 ? output_row_granularity : DEFAULT_OUTPUT_ROW_GRANULARITY} { printf("construct reader , limit = %d, %d, gradunarity %d \n", diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 4a32394c91f..de1d0ed68f5 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -36,22 +36,29 @@ namespace cudf::io::orc::detail { class memory_stats_logger { public: - explicit memory_stats_logger(rmm::mr::device_memory_resource* mr) - : existing_mr(mr), statistics_mr(rmm::mr::make_statistics_adaptor(existing_mr)) + explicit memory_stats_logger(rmm::mr::device_memory_resource* mr) : existing_mr(mr) { - rmm::mr::set_current_device_resource(&statistics_mr); + printf("exist mr: %p\n", mr); + + statistics_mr = + std::make_unique>( + existing_mr); + + rmm::mr::set_current_device_resource(statistics_mr.get()); } ~memory_stats_logger() { rmm::mr::set_current_device_resource(existing_mr); } [[nodiscard]] size_t peak_memory_usage() const noexcept { - return statistics_mr.get_bytes_counter().peak; + return statistics_mr->get_bytes_counter().peak; } private: rmm::mr::device_memory_resource* existing_mr; - rmm::mr::statistics_resource_adaptor statistics_mr; + static inline std::unique_ptr< + rmm::mr::statistics_resource_adaptor> + statistics_mr; }; struct reader_column_meta; @@ -188,6 +195,8 @@ class reader::impl { rmm::cuda_stream_view const _stream; rmm::mr::device_memory_resource* const _mr; + memory_stats_logger mem_stats_logger; + // Reader configs struct { data_type timestamp_type; // override output timestamp resolution @@ -213,8 +222,6 @@ class reader::impl { std::vector> _out_buffers; static constexpr size_type DEFAULT_OUTPUT_ROW_GRANULARITY = 10'000; - - memory_stats_logger mem_stats_logger; }; } // namespace cudf::io::orc::detail From d5912b905fa556db05f7a63d59b328fe937a9dbc Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 5 Mar 2024 16:30:38 -0800 Subject: [PATCH 168/321] Split file Signed-off-by: Nghia Truong --- cpp/CMakeLists.txt | 1 + cpp/src/io/orc/reader_impl.cu | 1389 ------------------------ cpp/src/io/orc/reader_impl_decode.cu | 1451 ++++++++++++++++++++++++++ 3 files changed, 1452 insertions(+), 1389 deletions(-) create mode 100644 cpp/src/io/orc/reader_impl_decode.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index eb5360509d7..fc9854ebf7c 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -389,6 +389,7 @@ add_library( src/io/orc/reader.cu src/io/orc/reader_impl.cu src/io/orc/reader_impl_chunking.cu + src/io/orc/reader_impl_decode.cu src/io/orc/reader_impl_helpers.cpp src/io/orc/stats_enc.cu src/io/orc/stripe_data.cu diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 5f2a140ed4b..d4ddbea347c 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -59,1395 +59,6 @@ namespace cudf::io::orc::detail { -namespace { - -// TODO: update -// TODO: compute num stripes from chunks -/** - * @brief Decompresses the stripe data, at stream granularity. - * - * @param decompressor Block decompressor - * @param stripe_data List of source stripe column data - * @param stream_info List of stream to column mappings - * @param chunks Vector of list of column chunk descriptors - * @param row_groups Vector of list of row index descriptors - * @param num_stripes Number of stripes making up column chunks - * @param row_index_stride Distance between each row index - * @param use_base_stride Whether to use base stride obtained from meta or use the computed value - * @param stream CUDA stream used for device memory operations and kernel launches - * @return Device buffer to decompressed page data - */ -rmm::device_buffer decompress_stripe_data( - chunk const& load_stripe_chunk, - chunk const& stripe_chunk, - stream_id_map const& compinfo_map, - OrcDecompressor const& decompressor, - host_span stripe_data, - host_span stream_info, - cudf::detail::hostdevice_2dvector& chunks, - cudf::detail::hostdevice_2dvector& row_groups, - size_type num_stripes, - size_type row_index_stride, - bool use_base_stride, - rmm::cuda_stream_view stream) -{ - // Count the exact number of compressed blocks - std::size_t num_compressed_blocks = 0; - std::size_t num_uncompressed_blocks = 0; - std::size_t total_decomp_size = 0; - - // printf("decompress #stripe: %d, ") - - // TODO: use lvl_stripe_stream_chunks - std::size_t count{0}; - for (auto const& info : stream_info) { - if (info.id.stripe_idx < stripe_chunk.start_idx || - info.id.stripe_idx >= stripe_chunk.start_idx + stripe_chunk.count) { - continue; - } - count++; - } - - cudf::detail::hostdevice_vector compinfo(0, count, stream); - - for (auto const& info : stream_info) { - if (info.id.stripe_idx < stripe_chunk.start_idx || - info.id.stripe_idx >= stripe_chunk.start_idx + stripe_chunk.count) { - continue; - } - -#ifdef PRINT_DEBUG - printf("collec stream again [%d, %d, %d, %d]: dst = %lu, length = %lu\n", - (int)info.id.stripe_idx, - (int)info.id.level, - (int)info.id.orc_cold_idx, - (int)info.id.kind, - info.dst_pos, - info.length); - fflush(stdout); -#endif - - compinfo.push_back(gpu::CompressedStreamInfo( - static_cast( - stripe_data[info.id.stripe_idx - load_stripe_chunk.start_idx].data()) + - info.dst_pos, - info.length)); - - // printf("line %d\n", __LINE__); - // fflush(stdout); - auto const& cached_comp_info = compinfo_map.at( - stream_id_info{info.id.stripe_idx, info.id.level, info.id.orc_col_idx, info.id.kind}); - // printf("line %d\n", __LINE__); - // fflush(stdout); - // auto const& cached_comp_info = - // compinfo_map[stream_id_info{info.id.stripe_idx, info.id.level, info.id.orc_cold_idx, - // info.id.kind}]; - auto& stream_comp_info = compinfo.back(); - stream_comp_info.num_compressed_blocks = cached_comp_info.num_compressed_blocks; - stream_comp_info.num_uncompressed_blocks = cached_comp_info.num_uncompressed_blocks; - stream_comp_info.max_uncompressed_size = cached_comp_info.total_decomp_size; - - num_compressed_blocks += cached_comp_info.num_compressed_blocks; - num_uncompressed_blocks += cached_comp_info.num_uncompressed_blocks; - total_decomp_size += cached_comp_info.total_decomp_size; - } - - CUDF_EXPECTS( - not((num_uncompressed_blocks + num_compressed_blocks > 0) and (total_decomp_size == 0)), - "Inconsistent info on compression blocks"); - -#ifdef XXX - std::size_t old_num_compressed_blocks = num_compressed_blocks; - std::size_t old_num_uncompressed_blocks = num_uncompressed_blocks; - std::size_t old_total_decomp_size = total_decomp_size; - - num_compressed_blocks = 0; - num_uncompressed_blocks = 0; - total_decomp_size = 0; - for (std::size_t i = 0; i < compinfo.size(); ++i) { - num_compressed_blocks += compinfo[i].num_compressed_blocks; - num_uncompressed_blocks += compinfo[i].num_uncompressed_blocks; - total_decomp_size += compinfo[i].max_uncompressed_size; - - auto const& info = stream_info[i]; - printf("compute info [%d, %d, %d, %d]: %lu | %lu | %lu\n", - (int)info.id.stripe_idx, - (int)info.id.level, - (int)info.id.orc_cold_idx, - (int)info.id.kind, - (size_t)compinfo[i].num_compressed_blocks, - (size_t)compinfo[i].num_uncompressed_blocks, - compinfo[i].max_uncompressed_size); - fflush(stdout); - } - - if (old_num_compressed_blocks != num_compressed_blocks || - old_num_uncompressed_blocks != num_uncompressed_blocks || - old_total_decomp_size != total_decomp_size) { - printf("invalid: %d - %d, %d - %d, %d - %d\n", - (int)old_num_compressed_blocks, - (int)num_compressed_blocks, - (int)old_num_uncompressed_blocks, - (int)num_uncompressed_blocks, - (int)old_total_decomp_size, - (int)total_decomp_size - - ); - } -#endif - - // Buffer needs to be padded. - // Required by `gpuDecodeOrcColumnData`. - rmm::device_buffer decomp_data( - cudf::util::round_up_safe(total_decomp_size, BUFFER_PADDING_MULTIPLE), stream); - if (decomp_data.is_empty()) { return decomp_data; } - - rmm::device_uvector> inflate_in( - num_compressed_blocks + num_uncompressed_blocks, stream); - rmm::device_uvector> inflate_out( - num_compressed_blocks + num_uncompressed_blocks, stream); - rmm::device_uvector inflate_res(num_compressed_blocks, stream); - thrust::fill(rmm::exec_policy(stream), - inflate_res.begin(), - inflate_res.end(), - compression_result{0, compression_status::FAILURE}); - - // Parse again to populate the decompression input/output buffers - std::size_t decomp_offset = 0; - uint32_t max_uncomp_block_size = 0; - uint32_t start_pos = 0; - auto start_pos_uncomp = (uint32_t)num_compressed_blocks; - for (std::size_t i = 0; i < compinfo.size(); ++i) { - auto dst_base = static_cast(decomp_data.data()); - compinfo[i].uncompressed_data = dst_base + decomp_offset; - compinfo[i].dec_in_ctl = inflate_in.data() + start_pos; - compinfo[i].dec_out_ctl = inflate_out.data() + start_pos; - compinfo[i].dec_res = {inflate_res.data() + start_pos, compinfo[i].num_compressed_blocks}; - compinfo[i].copy_in_ctl = inflate_in.data() + start_pos_uncomp; - compinfo[i].copy_out_ctl = inflate_out.data() + start_pos_uncomp; - - // stream_info[i].dst_pos = decomp_offset; - decomp_offset += compinfo[i].max_uncompressed_size; - start_pos += compinfo[i].num_compressed_blocks; - start_pos_uncomp += compinfo[i].num_uncompressed_blocks; - max_uncomp_block_size = - std::max(max_uncomp_block_size, compinfo[i].max_uncompressed_block_size); - } - compinfo.host_to_device_async(stream); - gpu::ParseCompressedStripeData(compinfo.device_ptr(), - compinfo.size(), - decompressor.GetBlockSize(), - decompressor.GetLog2MaxCompressionRatio(), - stream); - - // Value for checking whether we decompress successfully. - // It doesn't need to be atomic as there is no race condition: we only write `true` if needed. - cudf::detail::hostdevice_vector any_block_failure(1, stream); - any_block_failure[0] = false; - any_block_failure.host_to_device_async(stream); - - // Dispatch batches of blocks to decompress - if (num_compressed_blocks > 0) { - device_span> inflate_in_view{inflate_in.data(), - num_compressed_blocks}; - device_span> inflate_out_view{inflate_out.data(), num_compressed_blocks}; - switch (decompressor.compression()) { - case compression_type::ZLIB: - if (nvcomp::is_decompression_disabled(nvcomp::compression_type::DEFLATE)) { - gpuinflate( - inflate_in_view, inflate_out_view, inflate_res, gzip_header_included::NO, stream); - } else { - nvcomp::batched_decompress(nvcomp::compression_type::DEFLATE, - inflate_in_view, - inflate_out_view, - inflate_res, - max_uncomp_block_size, - total_decomp_size, - stream); - } - break; - case compression_type::SNAPPY: - if (nvcomp::is_decompression_disabled(nvcomp::compression_type::SNAPPY)) { - gpu_unsnap(inflate_in_view, inflate_out_view, inflate_res, stream); - } else { - nvcomp::batched_decompress(nvcomp::compression_type::SNAPPY, - inflate_in_view, - inflate_out_view, - inflate_res, - max_uncomp_block_size, - total_decomp_size, - stream); - } - break; - case compression_type::ZSTD: - if (auto const reason = nvcomp::is_decompression_disabled(nvcomp::compression_type::ZSTD); - reason) { - CUDF_FAIL("Decompression error: " + reason.value()); - } - nvcomp::batched_decompress(nvcomp::compression_type::ZSTD, - inflate_in_view, - inflate_out_view, - inflate_res, - max_uncomp_block_size, - total_decomp_size, - stream); - break; - case compression_type::LZ4: - if (auto const reason = nvcomp::is_decompression_disabled(nvcomp::compression_type::LZ4); - reason) { - CUDF_FAIL("Decompression error: " + reason.value()); - } - nvcomp::batched_decompress(nvcomp::compression_type::LZ4, - inflate_in_view, - inflate_out_view, - inflate_res, - max_uncomp_block_size, - total_decomp_size, - stream); - break; - default: CUDF_FAIL("Unexpected decompression dispatch"); break; - } - - // TODO: proclam return type - - // Check if any block has been failed to decompress. - // Not using `thrust::any` or `thrust::count_if` to defer stream sync. - thrust::for_each( - rmm::exec_policy(stream), - thrust::make_counting_iterator(std::size_t{0}), - thrust::make_counting_iterator(inflate_res.size()), - [results = inflate_res.begin(), - any_block_failure = any_block_failure.device_ptr()] __device__(auto const idx) { - if (results[idx].status != compression_status::SUCCESS) { *any_block_failure = true; } - }); - } - - if (num_uncompressed_blocks > 0) { - device_span> copy_in_view{inflate_in.data() + num_compressed_blocks, - num_uncompressed_blocks}; - device_span> copy_out_view{inflate_out.data() + num_compressed_blocks, - num_uncompressed_blocks}; - gpu_copy_uncompressed_blocks(copy_in_view, copy_out_view, stream); - } - - // Copy without stream sync, thus need to wait for stream sync below to access. - any_block_failure.device_to_host_async(stream); - - gpu::PostDecompressionReassemble(compinfo.device_ptr(), compinfo.size(), stream); - compinfo.device_to_host_sync(stream); // This also sync stream for `any_block_failure`. - - // We can check on host after stream synchronize - CUDF_EXPECTS(not any_block_failure[0], "Error during decompression"); - - auto const num_columns = static_cast(chunks.size().second); - - // Update the stream information with the updated uncompressed info - // TBD: We could update the value from the information we already - // have in stream_info[], but using the gpu results also updates - // max_uncompressed_size to the actual uncompressed size, or zero if - // decompression failed. - for (size_type i = 0; i < num_stripes; ++i) { - for (size_type j = 0; j < num_columns; ++j) { - auto& chunk = chunks[i][j]; - for (int k = 0; k < gpu::CI_NUM_STREAMS; ++k) { - if (chunk.strm_len[k] > 0 && chunk.strm_id[k] < compinfo.size()) { - chunk.streams[k] = compinfo[chunk.strm_id[k]].uncompressed_data; - chunk.strm_len[k] = compinfo[chunk.strm_id[k]].max_uncompressed_size; - } - } - } - } - - if (row_groups.size().first) { - chunks.host_to_device_async(stream); - row_groups.host_to_device_async(stream); - gpu::ParseRowGroupIndex(row_groups.base_device_ptr(), - compinfo.device_ptr(), - chunks.base_device_ptr(), - num_columns, - num_stripes, - row_index_stride, - use_base_stride, - stream); - } - - return decomp_data; -} - -/** - * @brief Updates null mask of columns whose parent is a struct column. - * - * If struct column has null element, that row would be skipped while writing child column in ORC, - * so we need to insert the missing null elements in child column. There is another behavior from - * pyspark, where if the child column doesn't have any null elements, it will not have present - * stream, so in that case parent null mask need to be copied to child column. - * - * @param chunks Vector of list of column chunk descriptors - * @param out_buffers Output columns' device buffers - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource to use for device memory allocation - */ -void update_null_mask(cudf::detail::hostdevice_2dvector& chunks, - host_span out_buffers, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - auto const num_stripes = chunks.size().first; - auto const num_columns = chunks.size().second; - bool is_mask_updated = false; - - for (std::size_t col_idx = 0; col_idx < num_columns; ++col_idx) { - if (chunks[0][col_idx].parent_validity_info.valid_map_base != nullptr) { - if (not is_mask_updated) { - chunks.device_to_host_sync(stream); - is_mask_updated = true; - } - - auto parent_valid_map_base = chunks[0][col_idx].parent_validity_info.valid_map_base; - auto child_valid_map_base = out_buffers[col_idx].null_mask(); - auto child_mask_len = - chunks[0][col_idx].column_num_rows - chunks[0][col_idx].parent_validity_info.null_count; - auto parent_mask_len = chunks[0][col_idx].column_num_rows; - - if (child_valid_map_base != nullptr) { - rmm::device_uvector dst_idx(child_mask_len, stream); - // Copy indexes at which the parent has valid value. - thrust::copy_if(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(0) + parent_mask_len, - dst_idx.begin(), - [parent_valid_map_base] __device__(auto idx) { - return bit_is_set(parent_valid_map_base, idx); - }); - - auto merged_null_mask = cudf::detail::create_null_mask( - parent_mask_len, mask_state::ALL_NULL, rmm::cuda_stream_view(stream), mr); - auto merged_mask = static_cast(merged_null_mask.data()); - uint32_t* dst_idx_ptr = dst_idx.data(); - // Copy child valid bits from child column to valid indexes, this will merge both child - // and parent null masks - thrust::for_each(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(0) + dst_idx.size(), - [child_valid_map_base, dst_idx_ptr, merged_mask] __device__(auto idx) { - if (bit_is_set(child_valid_map_base, idx)) { - cudf::set_bit(merged_mask, dst_idx_ptr[idx]); - }; - }); - - out_buffers[col_idx].set_null_mask(std::move(merged_null_mask)); - - } else { - // Since child column doesn't have a mask, copy parent null mask - auto mask_size = bitmask_allocation_size_bytes(parent_mask_len); - out_buffers[col_idx].set_null_mask( - rmm::device_buffer(static_cast(parent_valid_map_base), mask_size, stream, mr)); - } - } - } - - if (is_mask_updated) { - // Update chunks with pointers to column data which might have been changed. - for (std::size_t stripe_idx = 0; stripe_idx < num_stripes; ++stripe_idx) { - for (std::size_t col_idx = 0; col_idx < num_columns; ++col_idx) { - auto& chunk = chunks[stripe_idx][col_idx]; - chunk.valid_map_base = out_buffers[col_idx].null_mask(); - } - } - chunks.host_to_device_sync(stream); - } -} - -/** - * @brief Converts the stripe column data and outputs to columns. - * - * @param num_dicts Number of dictionary entries required - * @param skip_rows Number of rows to offset from start - * @param row_index_stride Distance between each row index - * @param level Current nesting level being processed - * @param tz_table Local time to UTC conversion table - * @param chunks Vector of list of column chunk descriptors - * @param row_groups Vector of list of row index descriptors - * @param out_buffers Output columns' device buffers - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource to use for device memory allocation - */ -void decode_stream_data(std::size_t num_dicts, - int64_t skip_rows, - size_type row_index_stride, - std::size_t level, - table_view const& tz_table, - cudf::detail::hostdevice_2dvector& chunks, - cudf::detail::device_2dspan row_groups, - std::vector& out_buffers, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - auto const num_stripes = chunks.size().first; - auto const num_columns = chunks.size().second; - printf("decode %d stripess \n", (int)num_stripes); - - thrust::counting_iterator col_idx_it(0); - thrust::counting_iterator stripe_idx_it(0); - - // Update chunks with pointers to column data - std::for_each(stripe_idx_it, stripe_idx_it + num_stripes, [&](auto stripe_idx) { - std::for_each(col_idx_it, col_idx_it + num_columns, [&](auto col_idx) { - auto& chunk = chunks[stripe_idx][col_idx]; - chunk.column_data_base = out_buffers[col_idx].data(); - chunk.valid_map_base = out_buffers[col_idx].null_mask(); - }); - }); - - // Allocate global dictionary for deserializing - rmm::device_uvector global_dict(num_dicts, stream); - - chunks.host_to_device_sync(stream); - gpu::DecodeNullsAndStringDictionaries( - chunks.base_device_ptr(), global_dict.data(), num_columns, num_stripes, skip_rows, stream); - - if (level > 0) { - printf("update_null_mask\n"); - // Update nullmasks for children if parent was a struct and had null mask - update_null_mask(chunks, out_buffers, stream, mr); - } - - auto const tz_table_dptr = table_device_view::create(tz_table, stream); - rmm::device_scalar error_count(0, stream); - // Update the null map for child columns - - // printf( - // "num col: %d, num stripe: %d, skip row: %d, row_groups size: %d, row index stride: %d, " - // "level: " - // "%d\n", - // (int)num_columns, - // (int)num_stripes, - // (int)skip_rows, - // (int)row_groups.size().first, - // (int)row_index_stride, - // (int)level - // ); - - gpu::DecodeOrcColumnData(chunks.base_device_ptr(), - global_dict.data(), - row_groups, - num_columns, - num_stripes, - skip_rows, - *tz_table_dptr, - row_groups.size().first, - row_index_stride, - level, - error_count.data(), - stream); - chunks.device_to_host_async(stream); - // `value` synchronizes - auto const num_errors = error_count.value(stream); - CUDF_EXPECTS(num_errors == 0, "ORC data decode failed"); - - std::for_each(col_idx_it + 0, col_idx_it + num_columns, [&](auto col_idx) { - out_buffers[col_idx].null_count() = - std::accumulate(stripe_idx_it + 0, - stripe_idx_it + num_stripes, - 0, - [&](auto null_count, auto const stripe_idx) { - // printf( - // "null count: %d => %d\n", (int)stripe_idx, - // (int)chunks[stripe_idx][col_idx].null_count); - // printf("num child rows: %d \n", - // (int)chunks[stripe_idx][col_idx].num_child_rows); - - return null_count + chunks[stripe_idx][col_idx].null_count; - }); - }); -} - -/** - * @brief Compute the per-stripe prefix sum of null count, for each struct column in the current - * layer. - */ -void scan_null_counts(cudf::detail::hostdevice_2dvector const& chunks, - cudf::host_span> prefix_sums, - rmm::cuda_stream_view stream) -{ - auto const num_stripes = chunks.size().first; - if (num_stripes == 0) return; - - auto const num_columns = chunks.size().second; - std::vector>> prefix_sums_to_update; - for (auto col_idx = 0ul; col_idx < num_columns; ++col_idx) { - // Null counts sums are only needed for children of struct columns - if (chunks[0][col_idx].type_kind == STRUCT) { - prefix_sums_to_update.emplace_back(col_idx, prefix_sums[col_idx]); - } - } - auto const d_prefix_sums_to_update = cudf::detail::make_device_uvector_async( - prefix_sums_to_update, stream, rmm::mr::get_current_device_resource()); - - thrust::for_each(rmm::exec_policy(stream), - d_prefix_sums_to_update.begin(), - d_prefix_sums_to_update.end(), - [chunks = cudf::detail::device_2dspan{chunks}] __device__( - auto const& idx_psums) { - auto const col_idx = idx_psums.first; - auto const psums = idx_psums.second; - - thrust::transform( - thrust::seq, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(0) + psums.size(), - psums.begin(), - [&](auto stripe_idx) { return chunks[stripe_idx][col_idx].null_count; }); - - thrust::inclusive_scan(thrust::seq, psums.begin(), psums.end(), psums.begin()); - }); - // `prefix_sums_to_update` goes out of scope, copy has to be done before we return - stream.synchronize(); -} - -// TODO: this is called for each chunk of stripes. -/** - * @brief Aggregate child metadata from parent column chunks. - */ -void aggregate_child_meta(std::size_t stripe_start, - std::size_t level, - cudf::io::orc::detail::column_hierarchy const& selected_columns, - cudf::detail::host_2dspan chunks, - cudf::detail::host_2dspan row_groups, - host_span nested_cols, - host_span out_buffers, - reader_column_meta& col_meta) -{ - auto const num_of_stripes = chunks.size().first; - auto const num_of_rowgroups = row_groups.size().first; - auto const num_child_cols = selected_columns.levels[level + 1].size(); - auto const number_of_child_chunks = num_child_cols * num_of_stripes; - auto& num_child_rows = col_meta.num_child_rows; - auto& parent_column_data = col_meta.parent_column_data; - - // Reset the meta to store child column details. - num_child_rows.resize(selected_columns.levels[level + 1].size()); - std::fill(num_child_rows.begin(), num_child_rows.end(), 0); - parent_column_data.resize(number_of_child_chunks); - col_meta.parent_column_index.resize(number_of_child_chunks); - col_meta.child_start_row.resize(number_of_child_chunks); - col_meta.num_child_rows_per_stripe.resize(number_of_child_chunks); - col_meta.rwgrp_meta.resize(num_of_rowgroups * num_child_cols); - - auto child_start_row = cudf::detail::host_2dspan( - col_meta.child_start_row.data(), num_of_stripes, num_child_cols); - auto num_child_rows_per_stripe = cudf::detail::host_2dspan( - col_meta.num_child_rows_per_stripe.data(), num_of_stripes, num_child_cols); - auto rwgrp_meta = cudf::detail::host_2dspan( - col_meta.rwgrp_meta.data(), num_of_rowgroups, num_child_cols); - - int index = 0; // number of child column processed - - printf("\n\n"); - // For each parent column, update its child column meta for each stripe. - std::for_each(nested_cols.begin(), nested_cols.end(), [&](auto const p_col) { - // printf("p_col.id: %d\n", (int)p_col.id); - - auto const parent_col_idx = col_meta.orc_col_map[level][p_col.id]; - // printf(" level: %d, parent_col_idx: %d\n", (int)level, (int)parent_col_idx); - - int64_t start_row = 0; - auto processed_row_groups = 0; - - for (std::size_t stripe_id = 0; stripe_id < num_of_stripes; stripe_id++) { - // Aggregate num_rows and start_row from processed parent columns per row groups - if (num_of_rowgroups) { - // printf(" num_of_rowgroups: %d\n", (int)num_of_rowgroups); - - auto stripe_num_row_groups = chunks[stripe_id][parent_col_idx].num_rowgroups; - auto processed_child_rows = 0; - - for (std::size_t rowgroup_id = 0; rowgroup_id < stripe_num_row_groups; - rowgroup_id++, processed_row_groups++) { - auto const child_rows = row_groups[processed_row_groups][parent_col_idx].num_child_rows; - for (size_type id = 0; id < p_col.num_children; id++) { - auto const child_col_idx = index + id; - rwgrp_meta[processed_row_groups][child_col_idx].start_row = processed_child_rows; - rwgrp_meta[processed_row_groups][child_col_idx].num_rows = child_rows; - } - processed_child_rows += child_rows; - } - } - - // Aggregate start row, number of rows per chunk and total number of rows in a column - auto const child_rows = chunks[stripe_id][parent_col_idx].num_child_rows; - // printf(" stripe_id: %d: child_rows: %d\n", (int)stripe_id, (int)child_rows); - // printf(" p_col.num_children: %d\n", (int)p_col.num_children); - - for (size_type id = 0; id < p_col.num_children; id++) { - auto const child_col_idx = index + id; - - // TODO: Check for overflow here. - num_child_rows[child_col_idx] += child_rows; - num_child_rows_per_stripe[stripe_id][child_col_idx] = child_rows; - // start row could be different for each column when there is nesting at each stripe level - child_start_row[stripe_id][child_col_idx] = (stripe_id == 0) ? 0 : start_row; - // printf("update child_start_row (%d, %d): %d\n", - // (int)stripe_id, - // (int)child_col_idx, - // (int)start_row); - } - start_row += child_rows; - // printf(" start_row: %d\n", (int)start_row); - } - - // Parent column null mask and null count would be required for child column - // to adjust its nullmask. - auto type = out_buffers[parent_col_idx].type.id(); - auto parent_null_count = static_cast(out_buffers[parent_col_idx].null_count()); - auto parent_valid_map = out_buffers[parent_col_idx].null_mask(); - auto num_rows = out_buffers[parent_col_idx].size; - - for (size_type id = 0; id < p_col.num_children; id++) { - auto const child_col_idx = index + id; - col_meta.parent_column_index[child_col_idx] = parent_col_idx; - if (type == type_id::STRUCT) { - parent_column_data[child_col_idx] = {parent_valid_map, parent_null_count}; - // Number of rows in child will remain same as parent in case of struct column - num_child_rows[child_col_idx] = num_rows; - } else { - parent_column_data[child_col_idx] = {nullptr, 0}; - } - } - index += p_col.num_children; - }); -} - -/** - * @brief struct to store buffer data and size of list buffer - */ -struct list_buffer_data { - size_type* data; - size_type size; -}; - -// Generates offsets for list buffer from number of elements in a row. -void generate_offsets_for_list(host_span buff_data, rmm::cuda_stream_view stream) -{ - for (auto& list_data : buff_data) { - thrust::exclusive_scan(rmm::exec_policy_nosync(stream), - list_data.data, - list_data.data + list_data.size, - list_data.data); - } -} - -/** - * @brief TODO - * @param input - * @param size_limit - * @param stream - * @return - */ -std::vector find_table_splits(table_view const& input, - size_type segment_length, - std::size_t size_limit, - rmm::cuda_stream_view stream) -{ - printf("find table split, seg length = %d, limit = %d \n", segment_length, (int)size_limit); - - // If segment_length is zero: we don't have any limit on granularity. - // As such, set segment length to the number of rows. - if (segment_length == 0) { segment_length = input.num_rows(); } - - // If we have small number of rows, need to adjust segment_length before calling to - // `segmented_row_bit_count`. - segment_length = std::min(segment_length, input.num_rows()); - - // Default 10k rows. - auto const d_segmented_sizes = cudf::detail::segmented_row_bit_count( - input, segment_length, stream, rmm::mr::get_current_device_resource()); - - auto segmented_sizes = - cudf::detail::hostdevice_vector(d_segmented_sizes->size(), stream); - - // TODO: exec_policy_nosync - thrust::transform( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(d_segmented_sizes->size()), - segmented_sizes.d_begin(), - [segment_length, - num_rows = input.num_rows(), - d_sizes = d_segmented_sizes->view().begin()] __device__(auto const segment_idx) { - // Since the number of rows may not divisible by segment_length, - // the last segment may be shorter than the others. - auto const current_length = - cuda::std::min(segment_length, num_rows - segment_length * segment_idx); - auto const size = d_sizes[segment_idx]; - return cumulative_size{current_length, static_cast(size)}; - }); - - // TODO: remove: - segmented_sizes.device_to_host_sync(stream); - printf("total row sizes by segment = %d:\n", (int)segment_length); - for (auto& size : segmented_sizes) { - printf("size: %ld, %zu\n", size.count, size.size_bytes / CHAR_BIT); - } - - // TODO: exec_policy_nosync - thrust::inclusive_scan(rmm::exec_policy(stream), - segmented_sizes.d_begin(), - segmented_sizes.d_end(), - segmented_sizes.d_begin(), - cumulative_size_sum{}); - segmented_sizes.device_to_host_sync(stream); - - // Since the segment sizes are in bits, we need to multiply CHAR_BIT with the output limit. - return find_splits(segmented_sizes, input.num_rows(), size_limit * CHAR_BIT); -} - -} // namespace - -// TODO: this should be called per chunk of stripes. -void reader::impl::decompress_and_decode() -{ - if (_file_itm_data.has_no_data()) { return; } - - auto const stripe_chunk = - _chunk_read_data.decode_stripe_chunks[_chunk_read_data.curr_decode_stripe_chunk++]; - auto const stripe_start = stripe_chunk.start_idx; - auto const stripe_end = stripe_chunk.start_idx + stripe_chunk.count; - - auto const load_stripe_start = - _chunk_read_data.load_stripe_chunks[_chunk_read_data.curr_load_stripe_chunk - 1].start_idx; - - printf("\ndecoding data from stripe %d -> %d\n", (int)stripe_start, (int)stripe_end); - - auto const rows_to_skip = _file_itm_data.rows_to_skip; - // auto const rows_to_read = _file_itm_data.rows_to_read; - auto const& selected_stripes = _file_itm_data.selected_stripes; - - // auto const rows_to_skip = 0; - auto rows_to_read = 0; - for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { - auto const& stripe = selected_stripes[stripe_idx]; - auto const stripe_info = stripe.stripe_info; - // TODO: check overflow - // CUDF_EXPECTS(per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows < - // static_cast(std::numeric_limits::max()), - // "TODO"); - rows_to_read += static_cast(stripe_info->numberOfRows); - - if (_file_itm_data.rows_to_skip > 0) { - CUDF_EXPECTS(_file_itm_data.rows_to_skip < static_cast(stripe_info->numberOfRows), - "TODO"); - } - } - rows_to_read = std::min(rows_to_read - rows_to_skip, _file_itm_data.rows_to_read); - _file_itm_data.rows_to_skip = 0; - - // Set up table for converting timestamp columns from local to UTC time - auto const tz_table = [&, &selected_stripes = selected_stripes] { - auto const has_timestamp_column = std::any_of( - _selected_columns.levels.cbegin(), _selected_columns.levels.cend(), [&](auto const& col_lvl) { - return std::any_of(col_lvl.cbegin(), col_lvl.cend(), [&](auto const& col_meta) { - return _metadata.get_col_type(col_meta.id).kind == TypeKind::TIMESTAMP; - }); - }); - - return has_timestamp_column ? cudf::detail::make_timezone_transition_table( - {}, selected_stripes[0].stripe_footer->writerTimezone, _stream) - : std::make_unique(); - }(); - - auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; - auto& null_count_prefix_sums = _file_itm_data.null_count_prefix_sums; - auto& lvl_chunks = _file_itm_data.lvl_data_chunks; - - null_count_prefix_sums.clear(); - - // TODO: move this to global step - lvl_chunks.resize(_selected_columns.num_levels()); - _out_buffers.clear(); - _out_buffers.resize(_selected_columns.num_levels()); - - // - // - // - // TODO: move this to reader_impl.cu, decomp and decode step - // std::size_t num_stripes = selected_stripes.size(); - std::size_t num_stripes = stripe_chunk.count; - - // Iterates through levels of nested columns, child column will be one level down - // compared to parent column. - auto& col_meta = *_col_meta; - -#if 0 - printf("num_child_rows: (size %d)\n", (int)_col_meta->num_child_rows.size()); - if (_col_meta->num_child_rows.size()) { - for (auto x : _col_meta->num_child_rows) { - printf("%d, ", (int)x); - } - printf("\n"); - - _col_meta->num_child_rows.clear(); - } - - printf("parent_column_data null count: (size %d)\n", (int)_col_meta->parent_column_data.size()); - if (_col_meta->parent_column_data.size()) { - for (auto x : _col_meta->parent_column_data) { - printf("%d, ", (int)x.null_count); - } - printf("\n"); - _col_meta->parent_column_data.clear(); - } - - printf("parent_column_index: (size %d)\n", (int)_col_meta->parent_column_index.size()); - if (_col_meta->parent_column_index.size()) { - for (auto x : _col_meta->parent_column_index) { - printf("%d, ", (int)x); - } - printf("\n"); - _col_meta->parent_column_index.clear(); - } - - printf("child_start_row: (size %d)\n", (int)_col_meta->child_start_row.size()); - if (_col_meta->child_start_row.size()) { - for (auto x : _col_meta->child_start_row) { - printf("%d, ", (int)x); - } - printf("\n"); - _col_meta->child_start_row.clear(); - } - - printf("num_child_rows_per_stripe: (size %d)\n", - (int)_col_meta->num_child_rows_per_stripe.size()); - if (_col_meta->num_child_rows_per_stripe.size()) { - for (auto x : _col_meta->num_child_rows_per_stripe) { - printf("%d, ", (int)x); - } - printf("\n"); - _col_meta->num_child_rows_per_stripe.clear(); - } - - printf("rwgrp_meta: (size %d)\n", (int)_col_meta->rwgrp_meta.size()); - if (_col_meta->rwgrp_meta.size()) { - for (auto x : _col_meta->rwgrp_meta) { - printf("(%d | %d), ", (int)x.start_row, (int)x.num_rows); - } - printf("\n"); - } - -#endif - - auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_chunks; - - for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { - printf("processing level = %d\n", (int)level); - - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } - - auto const& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; - auto const [stream_begin, stream_end] = get_range(stripe_stream_chunks, stripe_chunk); - - auto& columns_level = _selected_columns.levels[level]; - - // TODO: do it in global step - // Association between each ORC column and its cudf::column - std::vector nested_cols; - - // Get a list of column data types - std::vector column_types; - for (auto& col : columns_level) { - auto col_type = - to_cudf_type(_metadata.get_col_type(col.id).kind, - _config.use_np_dtypes, - _config.timestamp_type.id(), - to_cudf_decimal_type(_config.decimal128_columns, _metadata, col.id)); - CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type"); - if (col_type == type_id::DECIMAL32 or col_type == type_id::DECIMAL64 or - col_type == type_id::DECIMAL128) { - // sign of the scale is changed since cuDF follows c++ libraries like CNL - // which uses negative scaling, but liborc and other libraries - // follow positive scaling. - auto const scale = - -static_cast(_metadata.get_col_type(col.id).scale.value_or(0)); - column_types.emplace_back(col_type, scale); - } else { - column_types.emplace_back(col_type); - } - - // Map each ORC column to its column - if (col_type == type_id::LIST or col_type == type_id::STRUCT) { - nested_cols.emplace_back(col); - } - } - - auto const num_columns = columns_level.size(); - auto& chunks = lvl_chunks[level]; - chunks = cudf::detail::hostdevice_2dvector(num_stripes, num_columns, _stream); - memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); - - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } - - const bool use_index = - _config.use_index && - // Do stripes have row group index - _metadata.is_row_grp_idx_present() && - // Only use if we don't have much work with complete columns & stripes - // TODO: Consider nrows, gpu, and tune the threshold - (rows_to_read > _metadata.get_row_index_stride() && !(_metadata.get_row_index_stride() & 7) && - _metadata.get_row_index_stride() != 0 && num_columns * num_stripes < 8 * 128) && - // Only use if first row is aligned to a stripe boundary - // TODO: Fix logic to handle unaligned rows - (rows_to_skip == 0); - - printf(" use_index: %d\n", (int)use_index); - - // Logically view streams as columns - auto const& stream_info = _file_itm_data.lvl_stream_info[level]; - - null_count_prefix_sums.emplace_back(); - null_count_prefix_sums.back().reserve(_selected_columns.levels[level].size()); - std::generate_n(std::back_inserter(null_count_prefix_sums.back()), - _selected_columns.levels[level].size(), - [&]() { - return cudf::detail::make_zeroed_device_uvector_async( - num_stripes, _stream, rmm::mr::get_current_device_resource()); - }); - - // Tracker for eventually deallocating compressed and uncompressed data - auto& stripe_data = lvl_stripe_data[level]; - - int64_t stripe_start_row = 0; - int64_t num_dict_entries = 0; - int64_t num_rowgroups = 0; - - // TODO: Stripe and stream idx must be by chunk. - // std::size_t stripe_idx = 0; - std::size_t stream_idx = 0; - - for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { - // for (auto const& stripe : selected_stripes) { - - printf("processing stripe_idx = %d\n", (int)stripe_idx); - auto const& stripe = selected_stripes[stripe_idx]; - auto const stripe_info = stripe.stripe_info; - auto const stripe_footer = stripe.stripe_footer; - - // printf("stripeinfo->indexLength: %d, data: %d\n", - // (int)stripe_info->indexLength, - // (int)stripe_info->dataLength); - - auto const total_data_size = gather_stream_info_and_column_desc(stripe_idx - stripe_start, - level, - stripe_info, - stripe_footer, - col_meta.orc_col_map[level], - _metadata.get_types(), - use_index, - level == 0, - &num_dict_entries, - &stream_idx, - std::nullopt, // stream_info - &chunks); - - auto const is_stripe_data_empty = total_data_size == 0; - printf("is_stripe_data_empty: %d\n", (int)is_stripe_data_empty); - - CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, - "Invalid index rowgroup stream data"); - - // TODO: Wrong? - // stripe load_stripe_start? - auto dst_base = static_cast(stripe_data[stripe_idx - load_stripe_start].data()); - - // printf("line %d\n", __LINE__); - // fflush(stdout); - - auto const num_rows_per_stripe = static_cast(stripe_info->numberOfRows); - printf(" num_rows_per_stripe : %d\n", (int)num_rows_per_stripe); - - auto const rowgroup_id = num_rowgroups; - auto stripe_num_rowgroups = 0; - if (use_index) { - stripe_num_rowgroups = (num_rows_per_stripe + _metadata.get_row_index_stride() - 1) / - _metadata.get_row_index_stride(); - } - - // printf("line %d\n", __LINE__); - // fflush(stdout); - - // Update chunks to reference streams pointers - for (std::size_t col_idx = 0; col_idx < num_columns; col_idx++) { - auto& chunk = chunks[stripe_idx - stripe_start][col_idx]; - // start row, number of rows in a each stripe and total number of rows - // may change in lower levels of nesting - chunk.start_row = - (level == 0) - ? stripe_start_row - : col_meta.child_start_row[(stripe_idx - stripe_start) * num_columns + col_idx]; - chunk.num_rows = - (level == 0) - ? static_cast(stripe_info->numberOfRows) - : col_meta - .num_child_rows_per_stripe[(stripe_idx - stripe_start) * num_columns + col_idx]; - printf("col idx: %d, start_row: %d, num rows: %d\n", - (int)col_idx, - (int)chunk.start_row, - (int)chunk.num_rows); - - chunk.column_num_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[col_idx]; - chunk.parent_validity_info = - (level == 0) ? column_validity_info{} : col_meta.parent_column_data[col_idx]; - chunk.parent_null_count_prefix_sums = - (level == 0) - ? nullptr - : null_count_prefix_sums[level - 1][col_meta.parent_column_index[col_idx]].data(); - chunk.encoding_kind = stripe_footer->columns[columns_level[col_idx].id].kind; - chunk.type_kind = - _metadata.per_file_metadata[stripe.source_idx].ff.types[columns_level[col_idx].id].kind; - - printf("type: %d\n", (int)chunk.type_kind); - - // num_child_rows for a struct column will be same, for other nested types it will be - // calculated. - chunk.num_child_rows = (chunk.type_kind != orc::STRUCT) ? 0 : chunk.num_rows; - chunk.dtype_id = column_types[col_idx].id(); - chunk.decimal_scale = _metadata.per_file_metadata[stripe.source_idx] - .ff.types[columns_level[col_idx].id] - .scale.value_or(0); - - chunk.rowgroup_id = rowgroup_id; - chunk.dtype_len = (column_types[col_idx].id() == type_id::STRING) - ? sizeof(string_index_pair) - : ((column_types[col_idx].id() == type_id::LIST) or - (column_types[col_idx].id() == type_id::STRUCT)) - ? sizeof(size_type) - : cudf::size_of(column_types[col_idx]); - chunk.num_rowgroups = stripe_num_rowgroups; - // printf("stripe_num_rowgroups: %d\n", (int)stripe_num_rowgroups); - - if (chunk.type_kind == orc::TIMESTAMP) { - chunk.timestamp_type_id = _config.timestamp_type.id(); - } - if (not is_stripe_data_empty) { - for (int k = 0; k < gpu::CI_NUM_STREAMS; k++) { - chunk.streams[k] = dst_base + stream_info[chunk.strm_id[k] + stream_begin].dst_pos; - // printf("chunk.streams[%d] of chunk.strm_id[%d], stripe %d | %d, collect from %d\n", - // (int)k, - // (int)chunk.strm_id[k], - // (int)stripe_idx, - // (int)stripe_start, - // (int)(chunk.strm_id[k] + stream_begin)); - } - } - } - - // printf("line %d\n", __LINE__); - // fflush(stdout); - - stripe_start_row += num_rows_per_stripe; - num_rowgroups += stripe_num_rowgroups; - - // stripe_idx++; - } // for (stripe : selected_stripes) - - // printf("line %d\n", __LINE__); - // fflush(stdout); - - if (stripe_data.empty()) { continue; } - - // Process dataset chunk pages into output columns - auto row_groups = - cudf::detail::hostdevice_2dvector(num_rowgroups, num_columns, _stream); - if (level > 0 and row_groups.size().first) { - cudf::host_span row_groups_span(row_groups.base_host_ptr(), - num_rowgroups * num_columns); - auto& rw_grp_meta = col_meta.rwgrp_meta; - - // Update start row and num rows per row group - std::transform(rw_grp_meta.begin(), - rw_grp_meta.end(), - row_groups_span.begin(), - rw_grp_meta.begin(), - [&](auto meta, auto& row_grp) { - row_grp.num_rows = meta.num_rows; - row_grp.start_row = meta.start_row; - return meta; - }); - } - - // printf("line %d\n", __LINE__); - // fflush(stdout); - - // Setup row group descriptors if using indexes - if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { - // printf("decompress----------------------\n"); - // printf("line %d\n", __LINE__); - // fflush(stdout); - CUDF_EXPECTS(_chunk_read_data.curr_load_stripe_chunk > 0, "ERRRRR"); - - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } - - auto decomp_data = decompress_stripe_data( - _chunk_read_data.load_stripe_chunks[_chunk_read_data.curr_load_stripe_chunk - 1], - stripe_chunk, - _file_itm_data.compinfo_map, - *_metadata.per_file_metadata[0].decompressor, - stripe_data, - stream_info, - chunks, - row_groups, - num_stripes, - _metadata.get_row_index_stride(), - level == 0, - _stream); - // stripe_data.clear(); - // stripe_data.push_back(std::move(decomp_data)); - - // TODO: only reset each one if the new size/type are different. - stripe_data[stripe_start - load_stripe_start] = std::move(decomp_data); - for (int64_t i = 1; i < stripe_chunk.count; ++i) { - stripe_data[i + stripe_start - load_stripe_start] = {}; - } - - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } - - // printf("line %d\n", __LINE__); - // fflush(stdout); - - } else { - // printf("no decompression----------------------\n"); - - if (row_groups.size().first) { - // printf("line %d\n", __LINE__); - // fflush(stdout); - chunks.host_to_device_async(_stream); - row_groups.host_to_device_async(_stream); - row_groups.host_to_device_async(_stream); - gpu::ParseRowGroupIndex(row_groups.base_device_ptr(), - nullptr, - chunks.base_device_ptr(), - num_columns, - num_stripes, - _metadata.get_row_index_stride(), - level == 0, - _stream); - } - } - - // printf("line %d\n", __LINE__); - // fflush(stdout); - - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } - - // TODO: do not clear but reset each one. - // and only reset if the new size/type are different. - _out_buffers[level].clear(); - - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } - - for (std::size_t i = 0; i < column_types.size(); ++i) { - bool is_nullable = false; - for (std::size_t j = 0; j < num_stripes; ++j) { - if (chunks[j][i].strm_len[gpu::CI_PRESENT] != 0) { - printf(" is nullable\n"); - is_nullable = true; - break; - } - } - auto is_list_type = (column_types[i].id() == type_id::LIST); - auto n_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[i]; - - // printf(" create col, num rows: %d\n", (int)n_rows); - - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } - - // For list column, offset column will be always size + 1 - if (is_list_type) n_rows++; - _out_buffers[level].emplace_back(column_types[i], n_rows, is_nullable, _stream, _mr); - - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", buffer size: " << n_rows - << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } - } - - // printf("line %d\n", __LINE__); - // fflush(stdout); - - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } - - decode_stream_data(num_dict_entries, - rows_to_skip, - _metadata.get_row_index_stride(), - level, - tz_table->view(), - chunks, - row_groups, - _out_buffers[level], - _stream, - _mr); - - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } - - // printf("line %d\n", __LINE__); - // fflush(stdout); - - if (nested_cols.size()) { - printf("have nested col\n"); - - // Extract information to process nested child columns - scan_null_counts(chunks, null_count_prefix_sums[level], _stream); - - row_groups.device_to_host_sync(_stream); - aggregate_child_meta(stripe_start, - level, - _selected_columns, - chunks, - row_groups, - nested_cols, - _out_buffers[level], - col_meta); - - // ORC stores number of elements at each row, so we need to generate offsets from that - std::vector buff_data; - std::for_each( - _out_buffers[level].begin(), _out_buffers[level].end(), [&buff_data](auto& out_buffer) { - if (out_buffer.type.id() == type_id::LIST) { - auto data = static_cast(out_buffer.data()); - buff_data.emplace_back(list_buffer_data{data, out_buffer.size}); - } - }); - - if (not buff_data.empty()) { generate_offsets_for_list(buff_data, _stream); } - } - - // printf("line %d\n", __LINE__); - // fflush(stdout); - } // end loop level - - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } - - std::vector> out_columns; - _out_metadata = get_meta_with_user_data(); - std::transform( - _selected_columns.levels[0].begin(), - _selected_columns.levels[0].end(), - std::back_inserter(out_columns), - [&](auto const& orc_col_meta) { - _out_metadata.schema_info.emplace_back(""); - auto col_buffer = assemble_buffer( - orc_col_meta.id, 0, *_col_meta, _metadata, _selected_columns, _out_buffers, _stream, _mr); - return make_column(col_buffer, &_out_metadata.schema_info.back(), std::nullopt, _stream); - }); - _chunk_read_data.decoded_table = std::make_unique
(std::move(out_columns)); - - // TODO: do not clear but reset each one. - // and only reset if the new size/type are different. - // This clear is just to check if there is memory leak. - for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { - _out_buffers[level].clear(); - - auto& stripe_data = lvl_stripe_data[level]; - - if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { - stripe_data[stripe_start - load_stripe_start] = {}; - } else { - for (int64_t i = 0; i < stripe_chunk.count; ++i) { - stripe_data[i + stripe_start - load_stripe_start] = {}; - } - } - } - - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } - - // printf("col: \n"); - // cudf::test::print(_chunk_read_data.decoded_table->get_column(0).view()); - - // DEBUG only - // _chunk_read_data.output_size_limit = _chunk_read_data.data_read_limit / 3; - - _chunk_read_data.curr_output_table_chunk = 0; - _chunk_read_data.output_table_chunks = - _chunk_read_data.output_size_limit == 0 - ? std::vector{chunk{0, _chunk_read_data.decoded_table->num_rows()}} - : find_table_splits(_chunk_read_data.decoded_table->view(), - _chunk_read_data.output_row_granularity, - _chunk_read_data.output_size_limit, - _stream); - - auto& splits = _chunk_read_data.output_table_chunks; - printf("------------\nSplits decoded table (/total num rows = %d): \n", - (int)_chunk_read_data.decoded_table->num_rows()); - for (size_t idx = 0; idx < splits.size(); idx++) { - printf("{%ld, %ld}\n", splits[idx].start_idx, splits[idx].count); - } - fflush(stdout); - - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << "decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } -} - void reader::impl::prepare_data(int64_t skip_rows, std::optional const& num_rows_opt, std::vector> const& stripes) diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu new file mode 100644 index 00000000000..4971f65debb --- /dev/null +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -0,0 +1,1451 @@ +/* + * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// #define PRINT_DEBUG + +// TODO: remove +#include + +#include +// +// +// +#include "io/comp/gpuinflate.hpp" +#include "io/comp/nvcomp_adapter.hpp" +#include "io/orc/reader_impl.hpp" +#include "io/orc/reader_impl_chunking.hpp" +#include "io/orc/reader_impl_helpers.hpp" +#include "io/utilities/config_utils.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace cudf::io::orc::detail { + +namespace { + +// TODO: update +// TODO: compute num stripes from chunks +/** + * @brief Decompresses the stripe data, at stream granularity. + * + * @param decompressor Block decompressor + * @param stripe_data List of source stripe column data + * @param stream_info List of stream to column mappings + * @param chunks Vector of list of column chunk descriptors + * @param row_groups Vector of list of row index descriptors + * @param num_stripes Number of stripes making up column chunks + * @param row_index_stride Distance between each row index + * @param use_base_stride Whether to use base stride obtained from meta or use the computed value + * @param stream CUDA stream used for device memory operations and kernel launches + * @return Device buffer to decompressed page data + */ +rmm::device_buffer decompress_stripe_data( + chunk const& load_stripe_chunk, + chunk const& stripe_chunk, + stream_id_map const& compinfo_map, + OrcDecompressor const& decompressor, + host_span stripe_data, + host_span stream_info, + cudf::detail::hostdevice_2dvector& chunks, + cudf::detail::hostdevice_2dvector& row_groups, + size_type num_stripes, + size_type row_index_stride, + bool use_base_stride, + rmm::cuda_stream_view stream) +{ + // Count the exact number of compressed blocks + std::size_t num_compressed_blocks = 0; + std::size_t num_uncompressed_blocks = 0; + std::size_t total_decomp_size = 0; + + // printf("decompress #stripe: %d, ") + + // TODO: use lvl_stripe_stream_chunks + std::size_t count{0}; + for (auto const& info : stream_info) { + if (info.id.stripe_idx < stripe_chunk.start_idx || + info.id.stripe_idx >= stripe_chunk.start_idx + stripe_chunk.count) { + continue; + } + count++; + } + + cudf::detail::hostdevice_vector compinfo(0, count, stream); + + for (auto const& info : stream_info) { + if (info.id.stripe_idx < stripe_chunk.start_idx || + info.id.stripe_idx >= stripe_chunk.start_idx + stripe_chunk.count) { + continue; + } + +#ifdef PRINT_DEBUG + printf("collec stream again [%d, %d, %d, %d]: dst = %lu, length = %lu\n", + (int)info.id.stripe_idx, + (int)info.id.level, + (int)info.id.orc_cold_idx, + (int)info.id.kind, + info.dst_pos, + info.length); + fflush(stdout); +#endif + + compinfo.push_back(gpu::CompressedStreamInfo( + static_cast( + stripe_data[info.id.stripe_idx - load_stripe_chunk.start_idx].data()) + + info.dst_pos, + info.length)); + + // printf("line %d\n", __LINE__); + // fflush(stdout); + auto const& cached_comp_info = compinfo_map.at( + stream_id_info{info.id.stripe_idx, info.id.level, info.id.orc_col_idx, info.id.kind}); + // printf("line %d\n", __LINE__); + // fflush(stdout); + // auto const& cached_comp_info = + // compinfo_map[stream_id_info{info.id.stripe_idx, info.id.level, info.id.orc_cold_idx, + // info.id.kind}]; + auto& stream_comp_info = compinfo.back(); + stream_comp_info.num_compressed_blocks = cached_comp_info.num_compressed_blocks; + stream_comp_info.num_uncompressed_blocks = cached_comp_info.num_uncompressed_blocks; + stream_comp_info.max_uncompressed_size = cached_comp_info.total_decomp_size; + + num_compressed_blocks += cached_comp_info.num_compressed_blocks; + num_uncompressed_blocks += cached_comp_info.num_uncompressed_blocks; + total_decomp_size += cached_comp_info.total_decomp_size; + } + + CUDF_EXPECTS( + not((num_uncompressed_blocks + num_compressed_blocks > 0) and (total_decomp_size == 0)), + "Inconsistent info on compression blocks"); + +#ifdef XXX + std::size_t old_num_compressed_blocks = num_compressed_blocks; + std::size_t old_num_uncompressed_blocks = num_uncompressed_blocks; + std::size_t old_total_decomp_size = total_decomp_size; + + num_compressed_blocks = 0; + num_uncompressed_blocks = 0; + total_decomp_size = 0; + for (std::size_t i = 0; i < compinfo.size(); ++i) { + num_compressed_blocks += compinfo[i].num_compressed_blocks; + num_uncompressed_blocks += compinfo[i].num_uncompressed_blocks; + total_decomp_size += compinfo[i].max_uncompressed_size; + + auto const& info = stream_info[i]; + printf("compute info [%d, %d, %d, %d]: %lu | %lu | %lu\n", + (int)info.id.stripe_idx, + (int)info.id.level, + (int)info.id.orc_cold_idx, + (int)info.id.kind, + (size_t)compinfo[i].num_compressed_blocks, + (size_t)compinfo[i].num_uncompressed_blocks, + compinfo[i].max_uncompressed_size); + fflush(stdout); + } + + if (old_num_compressed_blocks != num_compressed_blocks || + old_num_uncompressed_blocks != num_uncompressed_blocks || + old_total_decomp_size != total_decomp_size) { + printf("invalid: %d - %d, %d - %d, %d - %d\n", + (int)old_num_compressed_blocks, + (int)num_compressed_blocks, + (int)old_num_uncompressed_blocks, + (int)num_uncompressed_blocks, + (int)old_total_decomp_size, + (int)total_decomp_size + + ); + } +#endif + + // Buffer needs to be padded. + // Required by `gpuDecodeOrcColumnData`. + rmm::device_buffer decomp_data( + cudf::util::round_up_safe(total_decomp_size, BUFFER_PADDING_MULTIPLE), stream); + if (decomp_data.is_empty()) { return decomp_data; } + + rmm::device_uvector> inflate_in( + num_compressed_blocks + num_uncompressed_blocks, stream); + rmm::device_uvector> inflate_out( + num_compressed_blocks + num_uncompressed_blocks, stream); + rmm::device_uvector inflate_res(num_compressed_blocks, stream); + thrust::fill(rmm::exec_policy(stream), + inflate_res.begin(), + inflate_res.end(), + compression_result{0, compression_status::FAILURE}); + + // Parse again to populate the decompression input/output buffers + std::size_t decomp_offset = 0; + uint32_t max_uncomp_block_size = 0; + uint32_t start_pos = 0; + auto start_pos_uncomp = (uint32_t)num_compressed_blocks; + for (std::size_t i = 0; i < compinfo.size(); ++i) { + auto dst_base = static_cast(decomp_data.data()); + compinfo[i].uncompressed_data = dst_base + decomp_offset; + compinfo[i].dec_in_ctl = inflate_in.data() + start_pos; + compinfo[i].dec_out_ctl = inflate_out.data() + start_pos; + compinfo[i].dec_res = {inflate_res.data() + start_pos, compinfo[i].num_compressed_blocks}; + compinfo[i].copy_in_ctl = inflate_in.data() + start_pos_uncomp; + compinfo[i].copy_out_ctl = inflate_out.data() + start_pos_uncomp; + + // stream_info[i].dst_pos = decomp_offset; + decomp_offset += compinfo[i].max_uncompressed_size; + start_pos += compinfo[i].num_compressed_blocks; + start_pos_uncomp += compinfo[i].num_uncompressed_blocks; + max_uncomp_block_size = + std::max(max_uncomp_block_size, compinfo[i].max_uncompressed_block_size); + } + compinfo.host_to_device_async(stream); + gpu::ParseCompressedStripeData(compinfo.device_ptr(), + compinfo.size(), + decompressor.GetBlockSize(), + decompressor.GetLog2MaxCompressionRatio(), + stream); + + // Value for checking whether we decompress successfully. + // It doesn't need to be atomic as there is no race condition: we only write `true` if needed. + cudf::detail::hostdevice_vector any_block_failure(1, stream); + any_block_failure[0] = false; + any_block_failure.host_to_device_async(stream); + + // Dispatch batches of blocks to decompress + if (num_compressed_blocks > 0) { + device_span> inflate_in_view{inflate_in.data(), + num_compressed_blocks}; + device_span> inflate_out_view{inflate_out.data(), num_compressed_blocks}; + switch (decompressor.compression()) { + case compression_type::ZLIB: + if (nvcomp::is_decompression_disabled(nvcomp::compression_type::DEFLATE)) { + gpuinflate( + inflate_in_view, inflate_out_view, inflate_res, gzip_header_included::NO, stream); + } else { + nvcomp::batched_decompress(nvcomp::compression_type::DEFLATE, + inflate_in_view, + inflate_out_view, + inflate_res, + max_uncomp_block_size, + total_decomp_size, + stream); + } + break; + case compression_type::SNAPPY: + if (nvcomp::is_decompression_disabled(nvcomp::compression_type::SNAPPY)) { + gpu_unsnap(inflate_in_view, inflate_out_view, inflate_res, stream); + } else { + nvcomp::batched_decompress(nvcomp::compression_type::SNAPPY, + inflate_in_view, + inflate_out_view, + inflate_res, + max_uncomp_block_size, + total_decomp_size, + stream); + } + break; + case compression_type::ZSTD: + if (auto const reason = nvcomp::is_decompression_disabled(nvcomp::compression_type::ZSTD); + reason) { + CUDF_FAIL("Decompression error: " + reason.value()); + } + nvcomp::batched_decompress(nvcomp::compression_type::ZSTD, + inflate_in_view, + inflate_out_view, + inflate_res, + max_uncomp_block_size, + total_decomp_size, + stream); + break; + case compression_type::LZ4: + if (auto const reason = nvcomp::is_decompression_disabled(nvcomp::compression_type::LZ4); + reason) { + CUDF_FAIL("Decompression error: " + reason.value()); + } + nvcomp::batched_decompress(nvcomp::compression_type::LZ4, + inflate_in_view, + inflate_out_view, + inflate_res, + max_uncomp_block_size, + total_decomp_size, + stream); + break; + default: CUDF_FAIL("Unexpected decompression dispatch"); break; + } + + // TODO: proclam return type + + // Check if any block has been failed to decompress. + // Not using `thrust::any` or `thrust::count_if` to defer stream sync. + thrust::for_each( + rmm::exec_policy(stream), + thrust::make_counting_iterator(std::size_t{0}), + thrust::make_counting_iterator(inflate_res.size()), + [results = inflate_res.begin(), + any_block_failure = any_block_failure.device_ptr()] __device__(auto const idx) { + if (results[idx].status != compression_status::SUCCESS) { *any_block_failure = true; } + }); + } + + if (num_uncompressed_blocks > 0) { + device_span> copy_in_view{inflate_in.data() + num_compressed_blocks, + num_uncompressed_blocks}; + device_span> copy_out_view{inflate_out.data() + num_compressed_blocks, + num_uncompressed_blocks}; + gpu_copy_uncompressed_blocks(copy_in_view, copy_out_view, stream); + } + + // Copy without stream sync, thus need to wait for stream sync below to access. + any_block_failure.device_to_host_async(stream); + + gpu::PostDecompressionReassemble(compinfo.device_ptr(), compinfo.size(), stream); + compinfo.device_to_host_sync(stream); // This also sync stream for `any_block_failure`. + + // We can check on host after stream synchronize + CUDF_EXPECTS(not any_block_failure[0], "Error during decompression"); + + auto const num_columns = static_cast(chunks.size().second); + + // Update the stream information with the updated uncompressed info + // TBD: We could update the value from the information we already + // have in stream_info[], but using the gpu results also updates + // max_uncompressed_size to the actual uncompressed size, or zero if + // decompression failed. + for (size_type i = 0; i < num_stripes; ++i) { + for (size_type j = 0; j < num_columns; ++j) { + auto& chunk = chunks[i][j]; + for (int k = 0; k < gpu::CI_NUM_STREAMS; ++k) { + if (chunk.strm_len[k] > 0 && chunk.strm_id[k] < compinfo.size()) { + chunk.streams[k] = compinfo[chunk.strm_id[k]].uncompressed_data; + chunk.strm_len[k] = compinfo[chunk.strm_id[k]].max_uncompressed_size; + } + } + } + } + + if (row_groups.size().first) { + chunks.host_to_device_async(stream); + row_groups.host_to_device_async(stream); + gpu::ParseRowGroupIndex(row_groups.base_device_ptr(), + compinfo.device_ptr(), + chunks.base_device_ptr(), + num_columns, + num_stripes, + row_index_stride, + use_base_stride, + stream); + } + + return decomp_data; +} + +/** + * @brief Updates null mask of columns whose parent is a struct column. + * + * If struct column has null element, that row would be skipped while writing child column in ORC, + * so we need to insert the missing null elements in child column. There is another behavior from + * pyspark, where if the child column doesn't have any null elements, it will not have present + * stream, so in that case parent null mask need to be copied to child column. + * + * @param chunks Vector of list of column chunk descriptors + * @param out_buffers Output columns' device buffers + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource to use for device memory allocation + */ +void update_null_mask(cudf::detail::hostdevice_2dvector& chunks, + host_span out_buffers, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto const num_stripes = chunks.size().first; + auto const num_columns = chunks.size().second; + bool is_mask_updated = false; + + for (std::size_t col_idx = 0; col_idx < num_columns; ++col_idx) { + if (chunks[0][col_idx].parent_validity_info.valid_map_base != nullptr) { + if (not is_mask_updated) { + chunks.device_to_host_sync(stream); + is_mask_updated = true; + } + + auto parent_valid_map_base = chunks[0][col_idx].parent_validity_info.valid_map_base; + auto child_valid_map_base = out_buffers[col_idx].null_mask(); + auto child_mask_len = + chunks[0][col_idx].column_num_rows - chunks[0][col_idx].parent_validity_info.null_count; + auto parent_mask_len = chunks[0][col_idx].column_num_rows; + + if (child_valid_map_base != nullptr) { + rmm::device_uvector dst_idx(child_mask_len, stream); + // Copy indexes at which the parent has valid value. + thrust::copy_if(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + parent_mask_len, + dst_idx.begin(), + [parent_valid_map_base] __device__(auto idx) { + return bit_is_set(parent_valid_map_base, idx); + }); + + auto merged_null_mask = cudf::detail::create_null_mask( + parent_mask_len, mask_state::ALL_NULL, rmm::cuda_stream_view(stream), mr); + auto merged_mask = static_cast(merged_null_mask.data()); + uint32_t* dst_idx_ptr = dst_idx.data(); + // Copy child valid bits from child column to valid indexes, this will merge both child + // and parent null masks + thrust::for_each(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + dst_idx.size(), + [child_valid_map_base, dst_idx_ptr, merged_mask] __device__(auto idx) { + if (bit_is_set(child_valid_map_base, idx)) { + cudf::set_bit(merged_mask, dst_idx_ptr[idx]); + }; + }); + + out_buffers[col_idx].set_null_mask(std::move(merged_null_mask)); + + } else { + // Since child column doesn't have a mask, copy parent null mask + auto mask_size = bitmask_allocation_size_bytes(parent_mask_len); + out_buffers[col_idx].set_null_mask( + rmm::device_buffer(static_cast(parent_valid_map_base), mask_size, stream, mr)); + } + } + } + + if (is_mask_updated) { + // Update chunks with pointers to column data which might have been changed. + for (std::size_t stripe_idx = 0; stripe_idx < num_stripes; ++stripe_idx) { + for (std::size_t col_idx = 0; col_idx < num_columns; ++col_idx) { + auto& chunk = chunks[stripe_idx][col_idx]; + chunk.valid_map_base = out_buffers[col_idx].null_mask(); + } + } + chunks.host_to_device_sync(stream); + } +} + +/** + * @brief Converts the stripe column data and outputs to columns. + * + * @param num_dicts Number of dictionary entries required + * @param skip_rows Number of rows to offset from start + * @param row_index_stride Distance between each row index + * @param level Current nesting level being processed + * @param tz_table Local time to UTC conversion table + * @param chunks Vector of list of column chunk descriptors + * @param row_groups Vector of list of row index descriptors + * @param out_buffers Output columns' device buffers + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource to use for device memory allocation + */ +void decode_stream_data(std::size_t num_dicts, + int64_t skip_rows, + size_type row_index_stride, + std::size_t level, + table_view const& tz_table, + cudf::detail::hostdevice_2dvector& chunks, + cudf::detail::device_2dspan row_groups, + std::vector& out_buffers, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto const num_stripes = chunks.size().first; + auto const num_columns = chunks.size().second; + printf("decode %d stripess \n", (int)num_stripes); + + thrust::counting_iterator col_idx_it(0); + thrust::counting_iterator stripe_idx_it(0); + + // Update chunks with pointers to column data + std::for_each(stripe_idx_it, stripe_idx_it + num_stripes, [&](auto stripe_idx) { + std::for_each(col_idx_it, col_idx_it + num_columns, [&](auto col_idx) { + auto& chunk = chunks[stripe_idx][col_idx]; + chunk.column_data_base = out_buffers[col_idx].data(); + chunk.valid_map_base = out_buffers[col_idx].null_mask(); + }); + }); + + // Allocate global dictionary for deserializing + rmm::device_uvector global_dict(num_dicts, stream); + + chunks.host_to_device_sync(stream); + gpu::DecodeNullsAndStringDictionaries( + chunks.base_device_ptr(), global_dict.data(), num_columns, num_stripes, skip_rows, stream); + + if (level > 0) { + printf("update_null_mask\n"); + // Update nullmasks for children if parent was a struct and had null mask + update_null_mask(chunks, out_buffers, stream, mr); + } + + auto const tz_table_dptr = table_device_view::create(tz_table, stream); + rmm::device_scalar error_count(0, stream); + // Update the null map for child columns + + // printf( + // "num col: %d, num stripe: %d, skip row: %d, row_groups size: %d, row index stride: %d, " + // "level: " + // "%d\n", + // (int)num_columns, + // (int)num_stripes, + // (int)skip_rows, + // (int)row_groups.size().first, + // (int)row_index_stride, + // (int)level + // ); + + gpu::DecodeOrcColumnData(chunks.base_device_ptr(), + global_dict.data(), + row_groups, + num_columns, + num_stripes, + skip_rows, + *tz_table_dptr, + row_groups.size().first, + row_index_stride, + level, + error_count.data(), + stream); + chunks.device_to_host_async(stream); + // `value` synchronizes + auto const num_errors = error_count.value(stream); + CUDF_EXPECTS(num_errors == 0, "ORC data decode failed"); + + std::for_each(col_idx_it + 0, col_idx_it + num_columns, [&](auto col_idx) { + out_buffers[col_idx].null_count() = + std::accumulate(stripe_idx_it + 0, + stripe_idx_it + num_stripes, + 0, + [&](auto null_count, auto const stripe_idx) { + // printf( + // "null count: %d => %d\n", (int)stripe_idx, + // (int)chunks[stripe_idx][col_idx].null_count); + // printf("num child rows: %d \n", + // (int)chunks[stripe_idx][col_idx].num_child_rows); + + return null_count + chunks[stripe_idx][col_idx].null_count; + }); + }); +} + +/** + * @brief Compute the per-stripe prefix sum of null count, for each struct column in the current + * layer. + */ +void scan_null_counts(cudf::detail::hostdevice_2dvector const& chunks, + cudf::host_span> prefix_sums, + rmm::cuda_stream_view stream) +{ + auto const num_stripes = chunks.size().first; + if (num_stripes == 0) return; + + auto const num_columns = chunks.size().second; + std::vector>> prefix_sums_to_update; + for (auto col_idx = 0ul; col_idx < num_columns; ++col_idx) { + // Null counts sums are only needed for children of struct columns + if (chunks[0][col_idx].type_kind == STRUCT) { + prefix_sums_to_update.emplace_back(col_idx, prefix_sums[col_idx]); + } + } + auto const d_prefix_sums_to_update = cudf::detail::make_device_uvector_async( + prefix_sums_to_update, stream, rmm::mr::get_current_device_resource()); + + thrust::for_each(rmm::exec_policy(stream), + d_prefix_sums_to_update.begin(), + d_prefix_sums_to_update.end(), + [chunks = cudf::detail::device_2dspan{chunks}] __device__( + auto const& idx_psums) { + auto const col_idx = idx_psums.first; + auto const psums = idx_psums.second; + + thrust::transform( + thrust::seq, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + psums.size(), + psums.begin(), + [&](auto stripe_idx) { return chunks[stripe_idx][col_idx].null_count; }); + + thrust::inclusive_scan(thrust::seq, psums.begin(), psums.end(), psums.begin()); + }); + // `prefix_sums_to_update` goes out of scope, copy has to be done before we return + stream.synchronize(); +} + +// TODO: this is called for each chunk of stripes. +/** + * @brief Aggregate child metadata from parent column chunks. + */ +void aggregate_child_meta(std::size_t stripe_start, + std::size_t level, + cudf::io::orc::detail::column_hierarchy const& selected_columns, + cudf::detail::host_2dspan chunks, + cudf::detail::host_2dspan row_groups, + host_span nested_cols, + host_span out_buffers, + reader_column_meta& col_meta) +{ + auto const num_of_stripes = chunks.size().first; + auto const num_of_rowgroups = row_groups.size().first; + auto const num_child_cols = selected_columns.levels[level + 1].size(); + auto const number_of_child_chunks = num_child_cols * num_of_stripes; + auto& num_child_rows = col_meta.num_child_rows; + auto& parent_column_data = col_meta.parent_column_data; + + // Reset the meta to store child column details. + num_child_rows.resize(selected_columns.levels[level + 1].size()); + std::fill(num_child_rows.begin(), num_child_rows.end(), 0); + parent_column_data.resize(number_of_child_chunks); + col_meta.parent_column_index.resize(number_of_child_chunks); + col_meta.child_start_row.resize(number_of_child_chunks); + col_meta.num_child_rows_per_stripe.resize(number_of_child_chunks); + col_meta.rwgrp_meta.resize(num_of_rowgroups * num_child_cols); + + auto child_start_row = cudf::detail::host_2dspan( + col_meta.child_start_row.data(), num_of_stripes, num_child_cols); + auto num_child_rows_per_stripe = cudf::detail::host_2dspan( + col_meta.num_child_rows_per_stripe.data(), num_of_stripes, num_child_cols); + auto rwgrp_meta = cudf::detail::host_2dspan( + col_meta.rwgrp_meta.data(), num_of_rowgroups, num_child_cols); + + int index = 0; // number of child column processed + + printf("\n\n"); + // For each parent column, update its child column meta for each stripe. + std::for_each(nested_cols.begin(), nested_cols.end(), [&](auto const p_col) { + // printf("p_col.id: %d\n", (int)p_col.id); + + auto const parent_col_idx = col_meta.orc_col_map[level][p_col.id]; + // printf(" level: %d, parent_col_idx: %d\n", (int)level, (int)parent_col_idx); + + int64_t start_row = 0; + auto processed_row_groups = 0; + + for (std::size_t stripe_id = 0; stripe_id < num_of_stripes; stripe_id++) { + // Aggregate num_rows and start_row from processed parent columns per row groups + if (num_of_rowgroups) { + // printf(" num_of_rowgroups: %d\n", (int)num_of_rowgroups); + + auto stripe_num_row_groups = chunks[stripe_id][parent_col_idx].num_rowgroups; + auto processed_child_rows = 0; + + for (std::size_t rowgroup_id = 0; rowgroup_id < stripe_num_row_groups; + rowgroup_id++, processed_row_groups++) { + auto const child_rows = row_groups[processed_row_groups][parent_col_idx].num_child_rows; + for (size_type id = 0; id < p_col.num_children; id++) { + auto const child_col_idx = index + id; + rwgrp_meta[processed_row_groups][child_col_idx].start_row = processed_child_rows; + rwgrp_meta[processed_row_groups][child_col_idx].num_rows = child_rows; + } + processed_child_rows += child_rows; + } + } + + // Aggregate start row, number of rows per chunk and total number of rows in a column + auto const child_rows = chunks[stripe_id][parent_col_idx].num_child_rows; + // printf(" stripe_id: %d: child_rows: %d\n", (int)stripe_id, (int)child_rows); + // printf(" p_col.num_children: %d\n", (int)p_col.num_children); + + for (size_type id = 0; id < p_col.num_children; id++) { + auto const child_col_idx = index + id; + + // TODO: Check for overflow here. + num_child_rows[child_col_idx] += child_rows; + num_child_rows_per_stripe[stripe_id][child_col_idx] = child_rows; + // start row could be different for each column when there is nesting at each stripe level + child_start_row[stripe_id][child_col_idx] = (stripe_id == 0) ? 0 : start_row; + // printf("update child_start_row (%d, %d): %d\n", + // (int)stripe_id, + // (int)child_col_idx, + // (int)start_row); + } + start_row += child_rows; + // printf(" start_row: %d\n", (int)start_row); + } + + // Parent column null mask and null count would be required for child column + // to adjust its nullmask. + auto type = out_buffers[parent_col_idx].type.id(); + auto parent_null_count = static_cast(out_buffers[parent_col_idx].null_count()); + auto parent_valid_map = out_buffers[parent_col_idx].null_mask(); + auto num_rows = out_buffers[parent_col_idx].size; + + for (size_type id = 0; id < p_col.num_children; id++) { + auto const child_col_idx = index + id; + col_meta.parent_column_index[child_col_idx] = parent_col_idx; + if (type == type_id::STRUCT) { + parent_column_data[child_col_idx] = {parent_valid_map, parent_null_count}; + // Number of rows in child will remain same as parent in case of struct column + num_child_rows[child_col_idx] = num_rows; + } else { + parent_column_data[child_col_idx] = {nullptr, 0}; + } + } + index += p_col.num_children; + }); +} + +/** + * @brief struct to store buffer data and size of list buffer + */ +struct list_buffer_data { + size_type* data; + size_type size; +}; + +// Generates offsets for list buffer from number of elements in a row. +void generate_offsets_for_list(host_span buff_data, rmm::cuda_stream_view stream) +{ + for (auto& list_data : buff_data) { + thrust::exclusive_scan(rmm::exec_policy_nosync(stream), + list_data.data, + list_data.data + list_data.size, + list_data.data); + } +} + +/** + * @brief TODO + * @param input + * @param size_limit + * @param stream + * @return + */ +std::vector find_table_splits(table_view const& input, + size_type segment_length, + std::size_t size_limit, + rmm::cuda_stream_view stream) +{ + printf("find table split, seg length = %d, limit = %d \n", segment_length, (int)size_limit); + + // If segment_length is zero: we don't have any limit on granularity. + // As such, set segment length to the number of rows. + if (segment_length == 0) { segment_length = input.num_rows(); } + + // If we have small number of rows, need to adjust segment_length before calling to + // `segmented_row_bit_count`. + segment_length = std::min(segment_length, input.num_rows()); + + // Default 10k rows. + auto const d_segmented_sizes = cudf::detail::segmented_row_bit_count( + input, segment_length, stream, rmm::mr::get_current_device_resource()); + + auto segmented_sizes = + cudf::detail::hostdevice_vector(d_segmented_sizes->size(), stream); + + // TODO: exec_policy_nosync + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(d_segmented_sizes->size()), + segmented_sizes.d_begin(), + [segment_length, + num_rows = input.num_rows(), + d_sizes = d_segmented_sizes->view().begin()] __device__(auto const segment_idx) { + // Since the number of rows may not divisible by segment_length, + // the last segment may be shorter than the others. + auto const current_length = + cuda::std::min(segment_length, num_rows - segment_length * segment_idx); + auto const size = d_sizes[segment_idx]; + return cumulative_size{current_length, static_cast(size)}; + }); + + // TODO: remove: + segmented_sizes.device_to_host_sync(stream); + printf("total row sizes by segment = %d:\n", (int)segment_length); + for (auto& size : segmented_sizes) { + printf("size: %ld, %zu\n", size.count, size.size_bytes / CHAR_BIT); + } + + // TODO: exec_policy_nosync + thrust::inclusive_scan(rmm::exec_policy(stream), + segmented_sizes.d_begin(), + segmented_sizes.d_end(), + segmented_sizes.d_begin(), + cumulative_size_sum{}); + segmented_sizes.device_to_host_sync(stream); + + // Since the segment sizes are in bits, we need to multiply CHAR_BIT with the output limit. + return find_splits(segmented_sizes, input.num_rows(), size_limit * CHAR_BIT); +} + +} // namespace + +// TODO: this should be called per chunk of stripes. +void reader::impl::decompress_and_decode() +{ + if (_file_itm_data.has_no_data()) { return; } + + auto const stripe_chunk = + _chunk_read_data.decode_stripe_chunks[_chunk_read_data.curr_decode_stripe_chunk++]; + auto const stripe_start = stripe_chunk.start_idx; + auto const stripe_end = stripe_chunk.start_idx + stripe_chunk.count; + + auto const load_stripe_start = + _chunk_read_data.load_stripe_chunks[_chunk_read_data.curr_load_stripe_chunk - 1].start_idx; + + printf("\ndecoding data from stripe %d -> %d\n", (int)stripe_start, (int)stripe_end); + + auto const rows_to_skip = _file_itm_data.rows_to_skip; + // auto const rows_to_read = _file_itm_data.rows_to_read; + auto const& selected_stripes = _file_itm_data.selected_stripes; + + // auto const rows_to_skip = 0; + auto rows_to_read = 0; + for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { + auto const& stripe = selected_stripes[stripe_idx]; + auto const stripe_info = stripe.stripe_info; + // TODO: check overflow + // CUDF_EXPECTS(per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows < + // static_cast(std::numeric_limits::max()), + // "TODO"); + rows_to_read += static_cast(stripe_info->numberOfRows); + + if (_file_itm_data.rows_to_skip > 0) { + CUDF_EXPECTS(_file_itm_data.rows_to_skip < static_cast(stripe_info->numberOfRows), + "TODO"); + } + } + rows_to_read = std::min(rows_to_read - rows_to_skip, _file_itm_data.rows_to_read); + _file_itm_data.rows_to_skip = 0; + + // Set up table for converting timestamp columns from local to UTC time + auto const tz_table = [&, &selected_stripes = selected_stripes] { + auto const has_timestamp_column = std::any_of( + _selected_columns.levels.cbegin(), _selected_columns.levels.cend(), [&](auto const& col_lvl) { + return std::any_of(col_lvl.cbegin(), col_lvl.cend(), [&](auto const& col_meta) { + return _metadata.get_col_type(col_meta.id).kind == TypeKind::TIMESTAMP; + }); + }); + + return has_timestamp_column ? cudf::detail::make_timezone_transition_table( + {}, selected_stripes[0].stripe_footer->writerTimezone, _stream) + : std::make_unique(); + }(); + + auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; + auto& null_count_prefix_sums = _file_itm_data.null_count_prefix_sums; + auto& lvl_chunks = _file_itm_data.lvl_data_chunks; + + null_count_prefix_sums.clear(); + + // TODO: move this to global step + lvl_chunks.resize(_selected_columns.num_levels()); + _out_buffers.clear(); + _out_buffers.resize(_selected_columns.num_levels()); + + // + // + // + // TODO: move this to reader_impl.cu, decomp and decode step + // std::size_t num_stripes = selected_stripes.size(); + std::size_t num_stripes = stripe_chunk.count; + + // Iterates through levels of nested columns, child column will be one level down + // compared to parent column. + auto& col_meta = *_col_meta; + +#if 0 + printf("num_child_rows: (size %d)\n", (int)_col_meta->num_child_rows.size()); + if (_col_meta->num_child_rows.size()) { + for (auto x : _col_meta->num_child_rows) { + printf("%d, ", (int)x); + } + printf("\n"); + + _col_meta->num_child_rows.clear(); + } + + printf("parent_column_data null count: (size %d)\n", (int)_col_meta->parent_column_data.size()); + if (_col_meta->parent_column_data.size()) { + for (auto x : _col_meta->parent_column_data) { + printf("%d, ", (int)x.null_count); + } + printf("\n"); + _col_meta->parent_column_data.clear(); + } + + printf("parent_column_index: (size %d)\n", (int)_col_meta->parent_column_index.size()); + if (_col_meta->parent_column_index.size()) { + for (auto x : _col_meta->parent_column_index) { + printf("%d, ", (int)x); + } + printf("\n"); + _col_meta->parent_column_index.clear(); + } + + printf("child_start_row: (size %d)\n", (int)_col_meta->child_start_row.size()); + if (_col_meta->child_start_row.size()) { + for (auto x : _col_meta->child_start_row) { + printf("%d, ", (int)x); + } + printf("\n"); + _col_meta->child_start_row.clear(); + } + + printf("num_child_rows_per_stripe: (size %d)\n", + (int)_col_meta->num_child_rows_per_stripe.size()); + if (_col_meta->num_child_rows_per_stripe.size()) { + for (auto x : _col_meta->num_child_rows_per_stripe) { + printf("%d, ", (int)x); + } + printf("\n"); + _col_meta->num_child_rows_per_stripe.clear(); + } + + printf("rwgrp_meta: (size %d)\n", (int)_col_meta->rwgrp_meta.size()); + if (_col_meta->rwgrp_meta.size()) { + for (auto x : _col_meta->rwgrp_meta) { + printf("(%d | %d), ", (int)x.start_row, (int)x.num_rows); + } + printf("\n"); + } + +#endif + + auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_chunks; + + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + printf("processing level = %d\n", (int)level); + + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + + auto const& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; + auto const [stream_begin, stream_end] = get_range(stripe_stream_chunks, stripe_chunk); + + auto& columns_level = _selected_columns.levels[level]; + + // TODO: do it in global step + // Association between each ORC column and its cudf::column + std::vector nested_cols; + + // Get a list of column data types + std::vector column_types; + for (auto& col : columns_level) { + auto col_type = + to_cudf_type(_metadata.get_col_type(col.id).kind, + _config.use_np_dtypes, + _config.timestamp_type.id(), + to_cudf_decimal_type(_config.decimal128_columns, _metadata, col.id)); + CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type"); + if (col_type == type_id::DECIMAL32 or col_type == type_id::DECIMAL64 or + col_type == type_id::DECIMAL128) { + // sign of the scale is changed since cuDF follows c++ libraries like CNL + // which uses negative scaling, but liborc and other libraries + // follow positive scaling. + auto const scale = + -static_cast(_metadata.get_col_type(col.id).scale.value_or(0)); + column_types.emplace_back(col_type, scale); + } else { + column_types.emplace_back(col_type); + } + + // Map each ORC column to its column + if (col_type == type_id::LIST or col_type == type_id::STRUCT) { + nested_cols.emplace_back(col); + } + } + + auto const num_columns = columns_level.size(); + auto& chunks = lvl_chunks[level]; + chunks = cudf::detail::hostdevice_2dvector(num_stripes, num_columns, _stream); + memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); + + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + + const bool use_index = + _config.use_index && + // Do stripes have row group index + _metadata.is_row_grp_idx_present() && + // Only use if we don't have much work with complete columns & stripes + // TODO: Consider nrows, gpu, and tune the threshold + (rows_to_read > _metadata.get_row_index_stride() && !(_metadata.get_row_index_stride() & 7) && + _metadata.get_row_index_stride() != 0 && num_columns * num_stripes < 8 * 128) && + // Only use if first row is aligned to a stripe boundary + // TODO: Fix logic to handle unaligned rows + (rows_to_skip == 0); + + printf(" use_index: %d\n", (int)use_index); + + // Logically view streams as columns + auto const& stream_info = _file_itm_data.lvl_stream_info[level]; + + null_count_prefix_sums.emplace_back(); + null_count_prefix_sums.back().reserve(_selected_columns.levels[level].size()); + std::generate_n(std::back_inserter(null_count_prefix_sums.back()), + _selected_columns.levels[level].size(), + [&]() { + return cudf::detail::make_zeroed_device_uvector_async( + num_stripes, _stream, rmm::mr::get_current_device_resource()); + }); + + // Tracker for eventually deallocating compressed and uncompressed data + auto& stripe_data = lvl_stripe_data[level]; + + int64_t stripe_start_row = 0; + int64_t num_dict_entries = 0; + int64_t num_rowgroups = 0; + + // TODO: Stripe and stream idx must be by chunk. + // std::size_t stripe_idx = 0; + std::size_t stream_idx = 0; + + for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { + // for (auto const& stripe : selected_stripes) { + + printf("processing stripe_idx = %d\n", (int)stripe_idx); + auto const& stripe = selected_stripes[stripe_idx]; + auto const stripe_info = stripe.stripe_info; + auto const stripe_footer = stripe.stripe_footer; + + // printf("stripeinfo->indexLength: %d, data: %d\n", + // (int)stripe_info->indexLength, + // (int)stripe_info->dataLength); + + auto const total_data_size = gather_stream_info_and_column_desc(stripe_idx - stripe_start, + level, + stripe_info, + stripe_footer, + col_meta.orc_col_map[level], + _metadata.get_types(), + use_index, + level == 0, + &num_dict_entries, + &stream_idx, + std::nullopt, // stream_info + &chunks); + + auto const is_stripe_data_empty = total_data_size == 0; + printf("is_stripe_data_empty: %d\n", (int)is_stripe_data_empty); + + CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, + "Invalid index rowgroup stream data"); + + // TODO: Wrong? + // stripe load_stripe_start? + auto dst_base = static_cast(stripe_data[stripe_idx - load_stripe_start].data()); + + // printf("line %d\n", __LINE__); + // fflush(stdout); + + auto const num_rows_per_stripe = static_cast(stripe_info->numberOfRows); + printf(" num_rows_per_stripe : %d\n", (int)num_rows_per_stripe); + + auto const rowgroup_id = num_rowgroups; + auto stripe_num_rowgroups = 0; + if (use_index) { + stripe_num_rowgroups = (num_rows_per_stripe + _metadata.get_row_index_stride() - 1) / + _metadata.get_row_index_stride(); + } + + // printf("line %d\n", __LINE__); + // fflush(stdout); + + // Update chunks to reference streams pointers + for (std::size_t col_idx = 0; col_idx < num_columns; col_idx++) { + auto& chunk = chunks[stripe_idx - stripe_start][col_idx]; + // start row, number of rows in a each stripe and total number of rows + // may change in lower levels of nesting + chunk.start_row = + (level == 0) + ? stripe_start_row + : col_meta.child_start_row[(stripe_idx - stripe_start) * num_columns + col_idx]; + chunk.num_rows = + (level == 0) + ? static_cast(stripe_info->numberOfRows) + : col_meta + .num_child_rows_per_stripe[(stripe_idx - stripe_start) * num_columns + col_idx]; + printf("col idx: %d, start_row: %d, num rows: %d\n", + (int)col_idx, + (int)chunk.start_row, + (int)chunk.num_rows); + + chunk.column_num_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[col_idx]; + chunk.parent_validity_info = + (level == 0) ? column_validity_info{} : col_meta.parent_column_data[col_idx]; + chunk.parent_null_count_prefix_sums = + (level == 0) + ? nullptr + : null_count_prefix_sums[level - 1][col_meta.parent_column_index[col_idx]].data(); + chunk.encoding_kind = stripe_footer->columns[columns_level[col_idx].id].kind; + chunk.type_kind = + _metadata.per_file_metadata[stripe.source_idx].ff.types[columns_level[col_idx].id].kind; + + printf("type: %d\n", (int)chunk.type_kind); + + // num_child_rows for a struct column will be same, for other nested types it will be + // calculated. + chunk.num_child_rows = (chunk.type_kind != orc::STRUCT) ? 0 : chunk.num_rows; + chunk.dtype_id = column_types[col_idx].id(); + chunk.decimal_scale = _metadata.per_file_metadata[stripe.source_idx] + .ff.types[columns_level[col_idx].id] + .scale.value_or(0); + + chunk.rowgroup_id = rowgroup_id; + chunk.dtype_len = (column_types[col_idx].id() == type_id::STRING) + ? sizeof(string_index_pair) + : ((column_types[col_idx].id() == type_id::LIST) or + (column_types[col_idx].id() == type_id::STRUCT)) + ? sizeof(size_type) + : cudf::size_of(column_types[col_idx]); + chunk.num_rowgroups = stripe_num_rowgroups; + // printf("stripe_num_rowgroups: %d\n", (int)stripe_num_rowgroups); + + if (chunk.type_kind == orc::TIMESTAMP) { + chunk.timestamp_type_id = _config.timestamp_type.id(); + } + if (not is_stripe_data_empty) { + for (int k = 0; k < gpu::CI_NUM_STREAMS; k++) { + chunk.streams[k] = dst_base + stream_info[chunk.strm_id[k] + stream_begin].dst_pos; + // printf("chunk.streams[%d] of chunk.strm_id[%d], stripe %d | %d, collect from %d\n", + // (int)k, + // (int)chunk.strm_id[k], + // (int)stripe_idx, + // (int)stripe_start, + // (int)(chunk.strm_id[k] + stream_begin)); + } + } + } + + // printf("line %d\n", __LINE__); + // fflush(stdout); + + stripe_start_row += num_rows_per_stripe; + num_rowgroups += stripe_num_rowgroups; + + // stripe_idx++; + } // for (stripe : selected_stripes) + + // printf("line %d\n", __LINE__); + // fflush(stdout); + + if (stripe_data.empty()) { continue; } + + // Process dataset chunk pages into output columns + auto row_groups = + cudf::detail::hostdevice_2dvector(num_rowgroups, num_columns, _stream); + if (level > 0 and row_groups.size().first) { + cudf::host_span row_groups_span(row_groups.base_host_ptr(), + num_rowgroups * num_columns); + auto& rw_grp_meta = col_meta.rwgrp_meta; + + // Update start row and num rows per row group + std::transform(rw_grp_meta.begin(), + rw_grp_meta.end(), + row_groups_span.begin(), + rw_grp_meta.begin(), + [&](auto meta, auto& row_grp) { + row_grp.num_rows = meta.num_rows; + row_grp.start_row = meta.start_row; + return meta; + }); + } + + // printf("line %d\n", __LINE__); + // fflush(stdout); + + // Setup row group descriptors if using indexes + if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { + // printf("decompress----------------------\n"); + // printf("line %d\n", __LINE__); + // fflush(stdout); + CUDF_EXPECTS(_chunk_read_data.curr_load_stripe_chunk > 0, "ERRRRR"); + + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + + auto decomp_data = decompress_stripe_data( + _chunk_read_data.load_stripe_chunks[_chunk_read_data.curr_load_stripe_chunk - 1], + stripe_chunk, + _file_itm_data.compinfo_map, + *_metadata.per_file_metadata[0].decompressor, + stripe_data, + stream_info, + chunks, + row_groups, + num_stripes, + _metadata.get_row_index_stride(), + level == 0, + _stream); + // stripe_data.clear(); + // stripe_data.push_back(std::move(decomp_data)); + + // TODO: only reset each one if the new size/type are different. + stripe_data[stripe_start - load_stripe_start] = std::move(decomp_data); + for (int64_t i = 1; i < stripe_chunk.count; ++i) { + stripe_data[i + stripe_start - load_stripe_start] = {}; + } + + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + + // printf("line %d\n", __LINE__); + // fflush(stdout); + + } else { + // printf("no decompression----------------------\n"); + + if (row_groups.size().first) { + // printf("line %d\n", __LINE__); + // fflush(stdout); + chunks.host_to_device_async(_stream); + row_groups.host_to_device_async(_stream); + row_groups.host_to_device_async(_stream); + gpu::ParseRowGroupIndex(row_groups.base_device_ptr(), + nullptr, + chunks.base_device_ptr(), + num_columns, + num_stripes, + _metadata.get_row_index_stride(), + level == 0, + _stream); + } + } + + // printf("line %d\n", __LINE__); + // fflush(stdout); + + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + + // TODO: do not clear but reset each one. + // and only reset if the new size/type are different. + _out_buffers[level].clear(); + + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + + for (std::size_t i = 0; i < column_types.size(); ++i) { + bool is_nullable = false; + for (std::size_t j = 0; j < num_stripes; ++j) { + if (chunks[j][i].strm_len[gpu::CI_PRESENT] != 0) { + printf(" is nullable\n"); + is_nullable = true; + break; + } + } + auto is_list_type = (column_types[i].id() == type_id::LIST); + auto n_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[i]; + + // printf(" create col, num rows: %d\n", (int)n_rows); + + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + + // For list column, offset column will be always size + 1 + if (is_list_type) n_rows++; + _out_buffers[level].emplace_back(column_types[i], n_rows, is_nullable, _stream, _mr); + + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", buffer size: " << n_rows + << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + } + + // printf("line %d\n", __LINE__); + // fflush(stdout); + + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + + decode_stream_data(num_dict_entries, + rows_to_skip, + _metadata.get_row_index_stride(), + level, + tz_table->view(), + chunks, + row_groups, + _out_buffers[level], + _stream, + _mr); + + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + + // printf("line %d\n", __LINE__); + // fflush(stdout); + + if (nested_cols.size()) { + printf("have nested col\n"); + + // Extract information to process nested child columns + scan_null_counts(chunks, null_count_prefix_sums[level], _stream); + + row_groups.device_to_host_sync(_stream); + aggregate_child_meta(stripe_start, + level, + _selected_columns, + chunks, + row_groups, + nested_cols, + _out_buffers[level], + col_meta); + + // ORC stores number of elements at each row, so we need to generate offsets from that + std::vector buff_data; + std::for_each( + _out_buffers[level].begin(), _out_buffers[level].end(), [&buff_data](auto& out_buffer) { + if (out_buffer.type.id() == type_id::LIST) { + auto data = static_cast(out_buffer.data()); + buff_data.emplace_back(list_buffer_data{data, out_buffer.size}); + } + }); + + if (not buff_data.empty()) { generate_offsets_for_list(buff_data, _stream); } + } + + // printf("line %d\n", __LINE__); + // fflush(stdout); + } // end loop level + + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + + std::vector> out_columns; + _out_metadata = get_meta_with_user_data(); + std::transform( + _selected_columns.levels[0].begin(), + _selected_columns.levels[0].end(), + std::back_inserter(out_columns), + [&](auto const& orc_col_meta) { + _out_metadata.schema_info.emplace_back(""); + auto col_buffer = assemble_buffer( + orc_col_meta.id, 0, *_col_meta, _metadata, _selected_columns, _out_buffers, _stream, _mr); + return make_column(col_buffer, &_out_metadata.schema_info.back(), std::nullopt, _stream); + }); + _chunk_read_data.decoded_table = std::make_unique
(std::move(out_columns)); + + // TODO: do not clear but reset each one. + // and only reset if the new size/type are different. + // This clear is just to check if there is memory leak. + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + _out_buffers[level].clear(); + + auto& stripe_data = lvl_stripe_data[level]; + + if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { + stripe_data[stripe_start - load_stripe_start] = {}; + } else { + for (int64_t i = 0; i < stripe_chunk.count; ++i) { + stripe_data[i + stripe_start - load_stripe_start] = {}; + } + } + } + + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } + + // printf("col: \n"); + // cudf::test::print(_chunk_read_data.decoded_table->get_column(0).view()); + + // DEBUG only + // _chunk_read_data.output_size_limit = _chunk_read_data.data_read_limit / 3; + + _chunk_read_data.curr_output_table_chunk = 0; + _chunk_read_data.output_table_chunks = + _chunk_read_data.output_size_limit == 0 + ? std::vector{chunk{0, _chunk_read_data.decoded_table->num_rows()}} + : find_table_splits(_chunk_read_data.decoded_table->view(), + _chunk_read_data.output_row_granularity, + _chunk_read_data.output_size_limit, + _stream); + + auto& splits = _chunk_read_data.output_table_chunks; + printf("------------\nSplits decoded table (/total num rows = %d): \n", + (int)_chunk_read_data.decoded_table->num_rows()); + for (size_t idx = 0; idx < splits.size(); idx++) { + printf("{%ld, %ld}\n", splits[idx].start_idx, splits[idx].count); + } + fflush(stdout); + + { + _stream.synchronize(); + auto peak_mem = mem_stats_logger.peak_memory_usage(); + std::cout << "decomp and decode, peak_memory_usage: " << peak_mem << "(" + << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + } +} + +} // namespace cudf::io::orc::detail From c44f0ec03c6338664e77388a4ba1f908e41669de Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 5 Mar 2024 16:53:07 -0800 Subject: [PATCH 169/321] Change comment and docs Signed-off-by: Nghia Truong --- cpp/benchmarks/CMakeLists.txt | 1 + cpp/include/cudf/io/detail/orc.hpp | 27 +++++++++++++++++---------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 5e7b13331a1..516338febca 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -255,6 +255,7 @@ ConfigureNVBench( # ################################################################################################## # * orc reader benchmark -------------------------------------------------------------------------- +# TODO: add back the removed file, and add new file ConfigureNVBench(ORC_READER_NVBENCH io/orc/orc_reader_input.cpp) # ################################################################################################## diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index c6176021a79..83035b32e04 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -79,10 +79,10 @@ class reader { }; /** - * @brief The reader class that supports iterative reading of a given file. + * @brief The reader class that supports iterative reading from an array of data sources. * * This class intentionally subclasses the `reader` class with private inheritance to hide the - * `reader::read()` API. As such, only chunked reading APIs are supported. + * base class `reader::read()` API. As such, only chunked reading APIs are supported through it. */ class chunked_reader : private reader { public: @@ -98,18 +98,25 @@ class chunked_reader : private reader { * * ``` * - * If `output_size_limit == 0` (i.e., no reading limit), a call to `read_chunk()` will read the - * whole file and return a table containing all rows. + * If `output_size_limit == 0` (i.e., no output limit) and `data_read_limit == 0` (no temporary + * memory size limit), a call to `read_chunk()` will read the whole data source and return a table + * containing all rows. * - * TODO: data read limit - * TODO: granularity + * The `output_size_limit` parameter controls the size of the output table to be returned per + * `read_chunk()` call. If the user specifies a 100 MB limit, the reader will attempt to return + * tables that have a total bytes size (over all columns) of 100 MB or less. + * This is a soft limit and the code will not fail if it cannot satisfy the limit. * - * @param output_size_limit Limit on total number of bytes to be returned per read, + * The `data_read_limit` parameter controls how much temporary memory is used in the entire + * process of loading, decompressing and decoding of data. Again, this is also a soft limit and + * the reader will try to make the best effort. + * + * @param output_size_limit Limit on total number of bytes to be returned per `read_chunk()` call, + * or `0` if there is no limit + * @param data_read_limit Limit on temporary memory usage for reading the data sources, * or `0` if there is no limit - * @param data_read_limit Limit on memory usage for the purposes of decompression and processing - * of input, or `0` if there is no limit * @param sources Input `datasource` objects to read the dataset from - * @param options Settings for controlling reading behavior + * @param options Settings for controlling reading behaviors * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ From b842118ad940521bf5f6c9ef1803c6e15faec212 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 6 Mar 2024 14:50:33 -0800 Subject: [PATCH 170/321] Add error check for `output_row_granularity` Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/io/orc/reader.cu b/cpp/src/io/orc/reader.cu index 4d285e6788d..79bcaae25f1 100644 --- a/cpp/src/io/orc/reader.cu +++ b/cpp/src/io/orc/reader.cu @@ -57,6 +57,7 @@ chunked_reader::chunked_reader(std::size_t output_size_limit, rmm::mr::device_memory_resource* mr) : reader() // TODO { + CUDF_EXPECTS(output_row_granularity > 0, "Invalid value of `output_row_granularity`."); _impl = std::make_unique(output_size_limit, data_read_limit, output_row_granularity, From 248f0ef382add0323b166137d88650fb9fca4bde Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 6 Mar 2024 15:12:14 -0800 Subject: [PATCH 171/321] Update docs Signed-off-by: Nghia Truong --- cpp/include/cudf/io/detail/orc.hpp | 36 +++++++++++++----------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index 83035b32e04..32fcafc1923 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -111,10 +111,18 @@ class chunked_reader : private reader { * process of loading, decompressing and decoding of data. Again, this is also a soft limit and * the reader will try to make the best effort. * + * Finally, the parameter `output_row_granularity` controls the changes in row number of the + * output chunk. For each call to `read_chunk()`, with respect to the given `data_read_limit`, a + * subset of stripes may be loaded, decompressed and decoded into an intermediate table. The + * reader will then subdivide that table into smaller tables for final output using + * `output_row_granularity` as the subdivision step. + * * @param output_size_limit Limit on total number of bytes to be returned per `read_chunk()` call, * or `0` if there is no limit * @param data_read_limit Limit on temporary memory usage for reading the data sources, * or `0` if there is no limit + * @param output_row_granularity The granularity parameter used for subdividing the decoded + * table for final output * @param sources Input `datasource` objects to read the dataset from * @param options Settings for controlling reading behaviors * @param stream CUDA stream used for device memory operations and kernel launches @@ -122,42 +130,28 @@ class chunked_reader : private reader { */ explicit chunked_reader(std::size_t output_size_limit, std::size_t data_read_limit, + size_type output_row_granularity, std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); - /** * @brief Constructor from size limits and an array of data sources with reader options. * - * The typical usage should be similar to this: - * ``` - * do { - * auto const chunk = reader.read_chunk(); - * // Process chunk - * } while (reader.has_next()); + * This constructor implicitly call the other constructor with `output_row_granularity` set to + * 10'000 rows. * - * ``` - * - * If `output_size_limit == 0` (i.e., no reading limit), a call to `read_chunk()` will read the - * whole file and return a table containing all rows. - * - * TODO: data read limit - * TODO: granularity - * - * @param output_size_limit Limit on total number of bytes to be returned per read, + * @param output_size_limit Limit on total number of bytes to be returned per `read_chunk()` call, + * or `0` if there is no limit + * @param data_read_limit Limit on temporary memory usage for reading the data sources, * or `0` if there is no limit - * @param data_read_limit Limit on memory usage for the purposes of decompression and processing - * of input, or `0` if there is no limit - * @param output_row_granularity TODO * @param sources Input `datasource` objects to read the dataset from - * @param options Settings for controlling reading behavior + * @param options Settings for controlling reading behaviors * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ explicit chunked_reader(std::size_t output_size_limit, std::size_t data_read_limit, - size_type output_row_granularity, std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, From 497eea5f5bab5e1047e5aafe5c7c91c041296ff9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 6 Mar 2024 15:33:54 -0800 Subject: [PATCH 172/321] Update docs Signed-off-by: Nghia Truong --- cpp/include/cudf/io/detail/orc.hpp | 59 ++-------------- cpp/include/cudf/io/orc.hpp | 110 +++++++++++++++++------------ cpp/src/io/functions.cpp | 34 ++++++--- 3 files changed, 95 insertions(+), 108 deletions(-) diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index 32fcafc1923..d532cee5677 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -87,46 +87,9 @@ class reader { class chunked_reader : private reader { public: /** - * @brief Constructor from size limits and an array of data sources with reader options. - * - * The typical usage should be similar to this: - * ``` - * do { - * auto const chunk = reader.read_chunk(); - * // Process chunk - * } while (reader.has_next()); - * - * ``` - * - * If `output_size_limit == 0` (i.e., no output limit) and `data_read_limit == 0` (no temporary - * memory size limit), a call to `read_chunk()` will read the whole data source and return a table - * containing all rows. - * - * The `output_size_limit` parameter controls the size of the output table to be returned per - * `read_chunk()` call. If the user specifies a 100 MB limit, the reader will attempt to return - * tables that have a total bytes size (over all columns) of 100 MB or less. - * This is a soft limit and the code will not fail if it cannot satisfy the limit. - * - * The `data_read_limit` parameter controls how much temporary memory is used in the entire - * process of loading, decompressing and decoding of data. Again, this is also a soft limit and - * the reader will try to make the best effort. - * - * Finally, the parameter `output_row_granularity` controls the changes in row number of the - * output chunk. For each call to `read_chunk()`, with respect to the given `data_read_limit`, a - * subset of stripes may be loaded, decompressed and decoded into an intermediate table. The - * reader will then subdivide that table into smaller tables for final output using - * `output_row_granularity` as the subdivision step. - * - * @param output_size_limit Limit on total number of bytes to be returned per `read_chunk()` call, - * or `0` if there is no limit - * @param data_read_limit Limit on temporary memory usage for reading the data sources, - * or `0` if there is no limit - * @param output_row_granularity The granularity parameter used for subdividing the decoded - * table for final output - * @param sources Input `datasource` objects to read the dataset from - * @param options Settings for controlling reading behaviors - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource to use for device memory allocation + * @copydoc cudf::io::chunked_orc_reader::chunked_orc_reader(std::size_t, std::size_t, size_type, + * std::vector>&&, orc_reader_options const&, + * rmm::cuda_stream_view, rmm::mr::device_memory_resource*) */ explicit chunked_reader(std::size_t output_size_limit, std::size_t data_read_limit, @@ -136,19 +99,9 @@ class chunked_reader : private reader { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); /** - * @brief Constructor from size limits and an array of data sources with reader options. - * - * This constructor implicitly call the other constructor with `output_row_granularity` set to - * 10'000 rows. - * - * @param output_size_limit Limit on total number of bytes to be returned per `read_chunk()` call, - * or `0` if there is no limit - * @param data_read_limit Limit on temporary memory usage for reading the data sources, - * or `0` if there is no limit - * @param sources Input `datasource` objects to read the dataset from - * @param options Settings for controlling reading behaviors - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource to use for device memory allocation + * @copydoc cudf::io::chunked_orc_reader::chunked_orc_reader(std::size_t, std::size_t, + * std::vector>&&, orc_reader_options const&, + * rmm::cuda_stream_view, rmm::mr::device_memory_resource*) */ explicit chunked_reader(std::size_t output_size_limit, std::size_t data_read_limit, diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 19252e77b91..129541be156 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -424,73 +424,95 @@ class chunked_orc_reader { chunked_orc_reader() = default; /** - * @brief Constructor for chunked reader. + * @brief Constructor from size limits and an array of data sources with reader options. * - * This constructor requires the same `orc_reader_option` parameter as in - * `cudf::read_orc()`, and additional parameters to specify the size byte limits of the - * output table for each reading. + * The typical usage should be similar to this: + * ``` + * do { + * auto const chunk = reader.read_chunk(); + * // Process chunk + * } while (reader.has_next()); * - * TODO: data read limit + * ``` * - * @param output_size_limit Limit on total number of bytes to be returned per read, + * If `output_size_limit == 0` (i.e., no output limit) and `data_read_limit == 0` (no temporary + * memory size limit), a call to `read_chunk()` will read the whole data source and return a table + * containing all rows. + * + * The `output_size_limit` parameter controls the size of the output table to be returned per + * `read_chunk()` call. If the user specifies a 100 MB limit, the reader will attempt to return + * tables that have a total bytes size (over all columns) of 100 MB or less. + * This is a soft limit and the code will not fail if it cannot satisfy the limit. + * + * The `data_read_limit` parameter controls how much temporary memory is used in the entire + * process of loading, decompressing and decoding of data. Again, this is also a soft limit and + * the reader will try to make the best effort. + * + * Finally, the parameter `output_row_granularity` controls the changes in row number of the + * output chunk. For each call to `read_chunk()`, with respect to the given `data_read_limit`, a + * subset of stripes may be loaded, decompressed and decoded into an intermediate table. The + * reader will then subdivide that table into smaller tables for final output using + * `output_row_granularity` as the subdivision step. + * + * @param output_size_limit Limit on total number of bytes to be returned per `read_chunk()` call, + * or `0` if there is no limit + * @param data_read_limit Limit on temporary memory usage for reading the data sources, * or `0` if there is no limit - * @param options The options used to read Parquet file + * @param output_row_granularity The granularity parameter used for subdividing the decoded + * table for final output + * @param sources Input `datasource` objects to read the dataset from + * @param options Settings for controlling reading behaviors * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ - chunked_orc_reader(std::size_t output_size_limit, - orc_reader_options const& options, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + explicit chunked_orc_reader( + std::size_t output_size_limit, + std::size_t data_read_limit, + size_type output_row_granularity, + orc_reader_options const& options, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Constructor for chunked reader. + * @brief Constructor from size limits and an array of data sources with reader options. * - * This constructor requires the same `orc_reader_option` parameter as in - * `cudf::read_orc()`, and additional parameters to specify the size byte limits of the - * output table for each reading. + * This constructor implicitly call the other constructor with `output_row_granularity` set to + * 10'000 rows. * - * TODO: data read limit - * - * @param output_size_limit Limit on total number of bytes to be returned per read, + * @param output_size_limit Limit on total number of bytes to be returned per `read_chunk()` call, + * or `0` if there is no limit + * @param data_read_limit Limit on temporary memory usage for reading the data sources, * or `0` if there is no limit - * @param data_read_limit Limit on memory usage for the purposes of decompression and processing - * of input, or `0` if there is no limit - * @param options The options used to read Parquet file + * @param sources Input `datasource` objects to read the dataset from + * @param options Settings for controlling reading behaviors * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ - chunked_orc_reader(std::size_t output_size_limit, - std::size_t data_read_limit, - orc_reader_options const& options, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + explicit chunked_orc_reader( + std::size_t output_size_limit, + std::size_t data_read_limit, + orc_reader_options const& options, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Constructor for chunked reader. + * @brief Constructor from output size limit and an array of data sources with reader options. * - * This constructor requires the same `orc_reader_option` parameter as in - * `cudf::read_orc()`, and additional parameters to specify the size byte limits of the - * output table for each reading. + * This constructor implicitly call the other constructor with `data_read_limit` set to `0` and + * `output_row_granularity` set to 10'000 rows. * - * TODO: data read limit - * - * @param output_size_limit Limit on total number of bytes to be returned per read, + * @param output_size_limit Limit on total number of bytes to be returned per `read_chunk()` call, * or `0` if there is no limit - * @param data_read_limit Limit on memory usage for the purposes of decompression and processing - * of input, or `0` if there is no limit - * @param output_row_granularity TODO - * @param options The options used to read Parquet file + * @param sources Input `datasource` objects to read the dataset from + * @param options Settings for controlling reading behaviors * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ - chunked_orc_reader(std::size_t output_size_limit, - std::size_t data_read_limit, - size_type output_row_granularity, - orc_reader_options const& options, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - + explicit chunked_orc_reader( + std::size_t output_size_limit, + orc_reader_options const& options, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Destructor, destroying the internal reader instance. * diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index 04799fabeef..378a37ce859 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -449,23 +449,19 @@ void write_orc(orc_writer_options const& options, rmm::cuda_stream_view stream) } /** - * @copydoc cudf::io::chunked_orc_reader::chunked_orc_reader + * @copydoc cudf::io::chunked_orc_reader::chunked_orc_reader(std::size_t, std::size_t, size_type, + * std::vector>&&, orc_reader_options const&, + * rmm::cuda_stream_view, rmm::mr::device_memory_resource*) */ -chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, - orc_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - : chunked_orc_reader(output_size_limit, 0UL, options, stream, mr) -{ -} - chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, std::size_t data_read_limit, + size_type output_row_granularity, orc_reader_options const& options, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) : reader{std::make_unique(output_size_limit, data_read_limit, + output_row_granularity, make_datasources(options.get_source()), options, stream, @@ -473,15 +469,18 @@ chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, { } +/** + * @copydoc cudf::io::chunked_orc_reader::chunked_orc_reader(std::size_t, std::size_t, + * std::vector>&&, orc_reader_options const&, + * rmm::cuda_stream_view, rmm::mr::device_memory_resource*) + */ chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, std::size_t data_read_limit, - size_type output_row_granularity, orc_reader_options const& options, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) : reader{std::make_unique(output_size_limit, data_read_limit, - output_row_granularity, make_datasources(options.get_source()), options, stream, @@ -489,6 +488,19 @@ chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, { } +/** + * @copydoc cudf::io::chunked_orc_reader::chunked_orc_reader(std::size_t, + * std::vector>&&, orc_reader_options const&, + * rmm::cuda_stream_view, rmm::mr::device_memory_resource*) + */ +chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + : chunked_orc_reader(output_size_limit, 0UL, options, stream, mr) +{ +} + /** * @copydoc cudf::io::chunked_orc_reader::~chunked_orc_reader */ From 33aff9412329c08a17ed06e842fff32be78d3f67 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 6 Mar 2024 16:41:14 -0800 Subject: [PATCH 173/321] Cleanup and change docs Signed-off-by: Nghia Truong --- cpp/include/cudf/io/orc.hpp | 10 +++----- cpp/src/io/functions.cpp | 28 ++++------------------- cpp/src/io/orc/aggregate_orc_metadata.cpp | 12 ++++------ cpp/src/io/orc/reader.cu | 4 ++-- cpp/src/io/utilities/row_selection.cpp | 14 ++++++------ 5 files changed, 20 insertions(+), 48 deletions(-) diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 129541be156..99580bd9886 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -406,7 +406,7 @@ table_with_metadata read_orc( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief The chunked orc reader class to read ORC file iteratively in to a series of + * @brief The chunked orc reader class to read ORC file iteratively into a series of * tables, chunk by chunk. * * This class is designed to address the reading issue when reading very large ORC files such @@ -515,10 +515,6 @@ class chunked_orc_reader { rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Destructor, destroying the internal reader instance. - * - * Since the declaration of the internal `reader` object does not exist in this header, this - * destructor needs to be defined in a separate source file which can access to that object's - * declaration. */ ~chunked_orc_reader(); @@ -1179,7 +1175,7 @@ class chunked_orc_writer_options { */ void set_stripe_size_bytes(size_t size_bytes) { - // CUDF_EXPECTS(size_bytes >= 64 << 10, "64KB is the minimum stripe size"); + CUDF_EXPECTS(size_bytes >= 64 << 10, "64KB is the minimum stripe size"); _stripe_size_bytes = size_bytes; } @@ -1195,7 +1191,7 @@ class chunked_orc_writer_options { */ void set_stripe_size_rows(size_type size_rows) { - // CUDF_EXPECTS(size_rows >= 512, "maximum stripe size cannot be smaller than 512"); + CUDF_EXPECTS(size_rows >= 512, "maximum stripe size cannot be smaller than 512"); _stripe_size_rows = size_rows; } diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index 378a37ce859..e8dbf97abd6 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -448,11 +448,6 @@ void write_orc(orc_writer_options const& options, rmm::cuda_stream_view stream) } } -/** - * @copydoc cudf::io::chunked_orc_reader::chunked_orc_reader(std::size_t, std::size_t, size_type, - * std::vector>&&, orc_reader_options const&, - * rmm::cuda_stream_view, rmm::mr::device_memory_resource*) - */ chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, std::size_t data_read_limit, size_type output_row_granularity, @@ -469,11 +464,6 @@ chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, { } -/** - * @copydoc cudf::io::chunked_orc_reader::chunked_orc_reader(std::size_t, std::size_t, - * std::vector>&&, orc_reader_options const&, - * rmm::cuda_stream_view, rmm::mr::device_memory_resource*) - */ chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, std::size_t data_read_limit, orc_reader_options const& options, @@ -488,11 +478,6 @@ chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, { } -/** - * @copydoc cudf::io::chunked_orc_reader::chunked_orc_reader(std::size_t, - * std::vector>&&, orc_reader_options const&, - * rmm::cuda_stream_view, rmm::mr::device_memory_resource*) - */ chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, orc_reader_options const& options, rmm::cuda_stream_view stream, @@ -501,14 +486,12 @@ chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, { } -/** - * @copydoc cudf::io::chunked_orc_reader::~chunked_orc_reader - */ +// This destructor destroys the internal reader instance. +// Since the declaration of the internal `reader` object does not exist in the header, this +// destructor needs to be defined in a separate source file which can access to that object's +// declaration. chunked_orc_reader::~chunked_orc_reader() = default; -/** - * @copydoc cudf::io::chunked_orc_reader::has_next - */ bool chunked_orc_reader::has_next() const { CUDF_FUNC_RANGE(); @@ -516,9 +499,6 @@ bool chunked_orc_reader::has_next() const return reader->has_next(); } -/** - * @copydoc cudf::io::chunked_orc_reader::read_chunk - */ table_with_metadata chunked_orc_reader::read_chunk() const { CUDF_FUNC_RANGE(); diff --git a/cpp/src/io/orc/aggregate_orc_metadata.cpp b/cpp/src/io/orc/aggregate_orc_metadata.cpp index 1e9cb50d532..9ec4488cbf2 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.cpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.cpp @@ -168,10 +168,6 @@ aggregate_orc_metadata::select_stripes( }(); struct stripe_source_mapping { - stripe_source_mapping(int source_idx, std::vector&& stripe_info) - : source_idx(source_idx), stripe_info(std::move(stripe_info)) - { - } int source_idx; std::vector stripe_info; }; @@ -206,8 +202,8 @@ aggregate_orc_metadata::select_stripes( (int)rows_to_read); printf(" stripe to read: %d-%d\n", (int)src_file_idx, (int)stripe_idx); } - selected_stripes_mapping.emplace_back(static_cast(src_file_idx), - std::move(stripe_infos)); + selected_stripes_mapping.emplace_back( + stripe_source_mapping{static_cast(src_file_idx), std::move(stripe_infos)}); } } else { int64_t count = 0; @@ -232,8 +228,8 @@ aggregate_orc_metadata::select_stripes( } } - selected_stripes_mapping.emplace_back(static_cast(src_file_idx), - std::move(stripe_infos)); + selected_stripes_mapping.emplace_back( + stripe_source_mapping{static_cast(src_file_idx), std::move(stripe_infos)}); } // Need to remove skipped rows from the stripes which are not selected. rows_to_skip -= stripe_skip_rows; diff --git a/cpp/src/io/orc/reader.cu b/cpp/src/io/orc/reader.cu index 79bcaae25f1..5ffff3d7d40 100644 --- a/cpp/src/io/orc/reader.cu +++ b/cpp/src/io/orc/reader.cu @@ -42,7 +42,7 @@ chunked_reader::chunked_reader(std::size_t output_size_limit, orc_reader_options const& options, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) - : reader() // TODO + : reader() { _impl = std::make_unique( output_size_limit, data_read_limit, std::move(sources), options, stream, mr); @@ -55,7 +55,7 @@ chunked_reader::chunked_reader(std::size_t output_size_limit, orc_reader_options const& options, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) - : reader() // TODO + : reader() { CUDF_EXPECTS(output_row_granularity > 0, "Invalid value of `output_row_granularity`."); _impl = std::make_unique(output_size_limit, diff --git a/cpp/src/io/utilities/row_selection.cpp b/cpp/src/io/utilities/row_selection.cpp index f136cd11ff7..d91791b3371 100644 --- a/cpp/src/io/utilities/row_selection.cpp +++ b/cpp/src/io/utilities/row_selection.cpp @@ -26,17 +26,17 @@ namespace cudf::io::detail { std::pair skip_rows_num_rows_from_options( int64_t skip_rows, std::optional const& num_rows, int64_t num_source_rows) { - auto const rows_to_skip = std::min(skip_rows, num_source_rows); + auto const rows_to_skip = std::min(skip_rows, num_source_rows); + auto const num_rows_can_read = num_source_rows - rows_to_skip; + if (not num_rows.has_value()) { - CUDF_EXPECTS(num_source_rows - rows_to_skip <= std::numeric_limits::max(), - "The requested number of rows exceeds the column size limit", + CUDF_EXPECTS(num_rows_can_read <= static_cast(std::numeric_limits::max()), + "The requested number of rows exceeds the column size limit.", std::overflow_error); - return {rows_to_skip, num_source_rows - rows_to_skip}; + return {rows_to_skip, static_cast(num_rows_can_read)}; } // Limit the number of rows to the end of the input - return { - rows_to_skip, - static_cast(std::min(num_rows.value(), num_source_rows - rows_to_skip))}; + return {rows_to_skip, std::min(num_rows.value(), static_cast(num_rows_can_read))}; } } // namespace cudf::io::detail From d071f46c4b30941e10bb6c715b08c58030dc8d35 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 6 Mar 2024 19:42:16 -0800 Subject: [PATCH 174/321] Support 64bit size for `rows_to_read` Signed-off-by: Nghia Truong --- cpp/src/io/orc/aggregate_orc_metadata.cpp | 28 ++++++++++++++-------- cpp/src/io/orc/aggregate_orc_metadata.hpp | 2 +- cpp/src/io/orc/reader_impl.cu | 13 ++++++---- cpp/src/io/orc/reader_impl.hpp | 13 ++++++---- cpp/src/io/orc/reader_impl_chunking.cu | 9 ++++++- cpp/src/io/orc/reader_impl_chunking.hpp | 2 +- cpp/src/io/orc/reader_impl_decode.cu | 10 ++++++-- cpp/src/io/parquet/reader_impl_helpers.cpp | 5 ++-- cpp/src/io/utilities/row_selection.cpp | 15 +++++------- cpp/src/io/utilities/row_selection.hpp | 5 ++-- 10 files changed, 65 insertions(+), 37 deletions(-) diff --git a/cpp/src/io/orc/aggregate_orc_metadata.cpp b/cpp/src/io/orc/aggregate_orc_metadata.cpp index 9ec4488cbf2..0a270877154 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.cpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.cpp @@ -152,7 +152,7 @@ aggregate_orc_metadata::aggregate_orc_metadata( } } -std::tuple> +std::tuple> aggregate_orc_metadata::select_stripes( std::vector> const& user_specified_stripes, int64_t skip_rows, @@ -163,7 +163,7 @@ aggregate_orc_metadata::select_stripes( "Can't use both the row selection and the stripe selection"); auto [rows_to_skip, rows_to_read] = [&]() { - if (not user_specified_stripes.empty()) { return std::pair{0, 0}; } + if (not user_specified_stripes.empty()) { return std::pair{0, 0}; } return cudf::io::detail::skip_rows_num_rows_from_options(skip_rows, num_rows, get_num_rows()); }(); @@ -194,12 +194,15 @@ aggregate_orc_metadata::select_stripes( nullptr, static_cast(src_file_idx)}); - // TODO: change return type to int64_t - rows_to_read += static_cast( - per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows); - printf(" rows_to_read : %d / %d\n", - (int)per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows, - (int)rows_to_read); + auto const stripe_rows = + per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows; + CUDF_EXPECTS(stripe_rows < static_cast(std::numeric_limits::max()), + "The number of rows in one stripe exceeds the column size limit.", + std::overflow_error); + rows_to_read += static_cast(stripe_rows); + + // TODO: remove below + printf(" rows_to_read : %d / %d\n", (int)stripe_rows, (int)rows_to_read); printf(" stripe to read: %d-%d\n", (int)src_file_idx, (int)stripe_idx); } selected_stripes_mapping.emplace_back( @@ -217,8 +220,13 @@ aggregate_orc_metadata::select_stripes( for (size_t stripe_idx = 0; stripe_idx < per_file_metadata[src_file_idx].ff.stripes.size() && count < rows_to_skip + rows_to_read; ++stripe_idx) { - count += - static_cast(per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows); + auto const stripe_rows = + per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows; + CUDF_EXPECTS(stripe_rows < static_cast(std::numeric_limits::max()), + "The number of rows in one stripe exceeds the column size limit.", + std::overflow_error); + count += static_cast(stripe_rows); + if (count > rows_to_skip || count == 0) { stripe_infos.push_back({&per_file_metadata[src_file_idx].ff.stripes[stripe_idx], nullptr, diff --git a/cpp/src/io/orc/aggregate_orc_metadata.hpp b/cpp/src/io/orc/aggregate_orc_metadata.hpp index 62d7a6a5c3f..613c08fb745 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.hpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.hpp @@ -113,7 +113,7 @@ class aggregate_orc_metadata { * * Stripes are potentially selected from multiple files. */ - [[nodiscard]] std::tuple> select_stripes( + [[nodiscard]] std::tuple> select_stripes( std::vector> const& user_specified_stripes, int64_t skip_rows, std::optional const& num_rows, diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index d4ddbea347c..43d3a2d38f8 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -61,7 +61,8 @@ namespace cudf::io::orc::detail { void reader::impl::prepare_data(int64_t skip_rows, std::optional const& num_rows_opt, - std::vector> const& stripes) + std::vector> const& stripes, + read_mode mode) { // Selected columns at different levels of nesting are stored in different elements // of `selected_columns`; thus, size == 1 means no nested columns @@ -73,7 +74,7 @@ void reader::impl::prepare_data(int64_t skip_rows, std::cout << "call global, skip = " << skip_rows << std::endl; - global_preprocess(skip_rows, num_rows_opt, stripes); + global_preprocess(skip_rows, num_rows_opt, stripes, mode); if (!_chunk_read_data.more_table_chunk_to_output()) { if (!_chunk_read_data.more_stripe_to_decode() && _chunk_read_data.more_stripe_to_load()) { @@ -272,14 +273,15 @@ table_with_metadata reader::impl::read(int64_t skip_rows, std::optional const& num_rows_opt, std::vector> const& stripes) { - prepare_data(skip_rows, num_rows_opt, stripes); + prepare_data(skip_rows, num_rows_opt, stripes, read_mode::READ_ALL); return make_output_chunk(); } bool reader::impl::has_next() { printf("==================query has next \n"); - prepare_data(_config.skip_rows, _config.num_read_rows, _config.selected_stripes); + prepare_data( + _config.skip_rows, _config.num_read_rows, _config.selected_stripes, read_mode::CHUNKED_READ); printf("has next: %d\n", (int)_chunk_read_data.has_next()); return _chunk_read_data.has_next(); @@ -313,7 +315,8 @@ table_with_metadata reader::impl::read_chunk() #endif } - prepare_data(_config.skip_rows, _config.num_read_rows, _config.selected_stripes); + prepare_data( + _config.skip_rows, _config.num_read_rows, _config.selected_stripes, read_mode::CHUNKED_READ); { _stream.synchronize(); diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index de1d0ed68f5..48257659ebb 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -122,6 +122,9 @@ class reader::impl { table_with_metadata read_chunk(); private: + // TODO + enum class read_mode { READ_ALL, CHUNKED_READ }; + /** * @brief Perform all the necessary data preprocessing before creating an output table. * @@ -132,9 +135,10 @@ class reader::impl { * @param num_rows_opt Optional number of rows to read, or `std::nullopt` to read all rows * @param stripes Indices of individual stripes to load if non-empty */ - void prepare_data(int64_t skip_rows = 0, - std::optional const& num_rows_opt = std::nullopt, - std::vector> const& stripes = {}); + void prepare_data(int64_t skip_rows, + std::optional const& num_rows_opt, + std::vector> const& stripes, + read_mode mode); /** * @brief Perform a global preprocessing step that executes exactly once for the entire duration @@ -150,7 +154,8 @@ class reader::impl { */ void global_preprocess(uint64_t skip_rows, std::optional const& num_rows_opt, - std::vector> const& stripes); + std::vector> const& stripes, + read_mode mode); /** * @brief Load stripes from the input source and store the data in the internal buffers. diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 944f23e7764..e9b6e92e9cb 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -339,7 +339,8 @@ std::pair get_range(std::vector const& input_chunks, void reader::impl::global_preprocess(uint64_t skip_rows, std::optional const& num_rows_opt, - std::vector> const& stripes) + std::vector> const& stripes, + read_mode mode) { if (_file_itm_data.global_preprocessed) { return; } _file_itm_data.global_preprocessed = true; @@ -350,6 +351,12 @@ void reader::impl::global_preprocess(uint64_t skip_rows, _metadata.select_stripes(stripes, skip_rows, num_rows_opt, _stream); if (_file_itm_data.has_no_data()) { return; } + CUDF_EXPECTS( + mode == read_mode::CHUNKED_READ || + _file_itm_data.rows_to_read <= static_cast(std::numeric_limits::max()), + "Number or rows to read exceeds the column size limit in READ_ALL mode.", + std::overflow_error); + printf("input skip rows: %d, num rows: %d\n", (int)skip_rows, (int)num_rows_opt.value_or(-1)); printf("actual skip rows: %d, num rows: %d\n", (int)_file_itm_data.rows_to_skip, diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 0769f46f1d1..f67407d3671 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -123,7 +123,7 @@ struct range { */ struct file_intermediate_data { int64_t rows_to_skip; - size_type rows_to_read; + int64_t rows_to_read; std::vector selected_stripes; // Return true if no rows or stripes to read. diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 4971f65debb..327a3124ebe 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -824,15 +824,17 @@ void reader::impl::decompress_and_decode() auto const& selected_stripes = _file_itm_data.selected_stripes; // auto const rows_to_skip = 0; - auto rows_to_read = 0; + int64_t rows_to_read = 0; for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { auto const& stripe = selected_stripes[stripe_idx]; auto const stripe_info = stripe.stripe_info; + // TODO: this is indeed not needed since we split stripes before this based on stripe row + // TODO: check overflow // CUDF_EXPECTS(per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows < // static_cast(std::numeric_limits::max()), // "TODO"); - rows_to_read += static_cast(stripe_info->numberOfRows); + rows_to_read += static_cast(stripe_info->numberOfRows); if (_file_itm_data.rows_to_skip > 0) { CUDF_EXPECTS(_file_itm_data.rows_to_skip < static_cast(stripe_info->numberOfRows), @@ -842,6 +844,10 @@ void reader::impl::decompress_and_decode() rows_to_read = std::min(rows_to_read - rows_to_skip, _file_itm_data.rows_to_read); _file_itm_data.rows_to_skip = 0; + CUDF_EXPECTS(rows_to_read <= static_cast(std::numeric_limits::max()), + "Number or rows to decode exceeds the column size limit.", + std::overflow_error); + // Set up table for converting timestamp columns from local to UTC time auto const tz_table = [&, &selected_stripes = selected_stripes] { auto const has_timestamp_column = std::any_of( diff --git a/cpp/src/io/parquet/reader_impl_helpers.cpp b/cpp/src/io/parquet/reader_impl_helpers.cpp index 6f11debb8df..7ab6b2cdd26 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.cpp +++ b/cpp/src/io/parquet/reader_impl_helpers.cpp @@ -461,8 +461,9 @@ aggregate_reader_metadata::select_row_groups( auto [rows_to_skip, rows_to_read] = [&]() { if (not row_group_indices.empty()) { return std::pair{}; } auto const from_opts = cudf::io::detail::skip_rows_num_rows_from_options( - skip_rows_opt, num_rows_opt, get_num_rows()); - return std::pair{static_cast(from_opts.first), from_opts.second}; + skip_rows_opt, std::optional{num_rows_opt.value()}, get_num_rows()); + return std::pair{static_cast(from_opts.first), + static_cast(from_opts.second)}; }(); if (!row_group_indices.empty()) { diff --git a/cpp/src/io/utilities/row_selection.cpp b/cpp/src/io/utilities/row_selection.cpp index d91791b3371..c0bbca39167 100644 --- a/cpp/src/io/utilities/row_selection.cpp +++ b/cpp/src/io/utilities/row_selection.cpp @@ -23,20 +23,17 @@ namespace cudf::io::detail { -std::pair skip_rows_num_rows_from_options( - int64_t skip_rows, std::optional const& num_rows, int64_t num_source_rows) +std::pair skip_rows_num_rows_from_options(int64_t skip_rows, + std::optional const& num_rows, + int64_t num_source_rows) { auto const rows_to_skip = std::min(skip_rows, num_source_rows); auto const num_rows_can_read = num_source_rows - rows_to_skip; - if (not num_rows.has_value()) { - CUDF_EXPECTS(num_rows_can_read <= static_cast(std::numeric_limits::max()), - "The requested number of rows exceeds the column size limit.", - std::overflow_error); - return {rows_to_skip, static_cast(num_rows_can_read)}; - } + if (not num_rows.has_value()) { return {rows_to_skip, num_rows_can_read}; } + // Limit the number of rows to the end of the input - return {rows_to_skip, std::min(num_rows.value(), static_cast(num_rows_can_read))}; + return {rows_to_skip, std::min(num_rows.value(), num_rows_can_read)}; } } // namespace cudf::io::detail diff --git a/cpp/src/io/utilities/row_selection.hpp b/cpp/src/io/utilities/row_selection.hpp index 0b5d3aef8bd..7fdcc65d77b 100644 --- a/cpp/src/io/utilities/row_selection.hpp +++ b/cpp/src/io/utilities/row_selection.hpp @@ -34,7 +34,8 @@ namespace cudf::io::detail { * * @throw std::overflow_exception The requested number of rows exceeds the column size limit */ -std::pair skip_rows_num_rows_from_options( - int64_t skip_rows, std::optional const& num_rows, int64_t num_source_rows); +std::pair skip_rows_num_rows_from_options(int64_t skip_rows, + std::optional const& num_rows, + int64_t num_source_rows); } // namespace cudf::io::detail From 388adb3985ce7b94e55b22679c57585ab3956979 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 6 Mar 2024 20:37:20 -0800 Subject: [PATCH 175/321] Implement `cumulative_size_and_row` Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 61 +++++++++++++++++-------- cpp/src/io/orc/reader_impl_chunking.hpp | 18 ++++++-- cpp/src/io/orc/reader_impl_decode.cu | 2 +- 3 files changed, 59 insertions(+), 22 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index e9b6e92e9cb..2bd98ce7c42 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -191,9 +191,8 @@ std::size_t gather_stream_info_and_column_desc( * @brief Find the splits of the input data such that each split has cumulative size less than a * given `size_limit`. */ -std::vector find_splits(host_span sizes, - int64_t total_count, - size_t size_limit) +template +std::vector find_splits(host_span sizes, int64_t total_count, size_t size_limit) { // if (size_limit == 0) { // printf("0 limit: output chunk = 0, %d\n", (int)total_count); @@ -251,6 +250,12 @@ std::vector find_splits(host_span sizes, return splits; } + +template std::vector find_splits(host_span sizes, + int64_t total_count, + size_t size_limit); +template std::vector find_splits( + host_span sizes, int64_t total_count, size_t size_limit); #endif namespace { @@ -528,7 +533,8 @@ void reader::impl::global_preprocess(uint64_t skip_rows, chunk_read_data::load_limit_ratio); return tmp > 0UL ? tmp : 1UL; }(); - _chunk_read_data.load_stripe_chunks = find_splits(total_stripe_sizes, num_stripes, load_limit); + _chunk_read_data.load_stripe_chunks = + find_splits(total_stripe_sizes, num_stripes, load_limit); #ifndef PRINT_DEBUG auto& splits = _chunk_read_data.load_stripe_chunks; @@ -557,11 +563,10 @@ void reader::impl::load_data() if (_file_itm_data.has_no_data()) { return; } // auto const rows_to_read = _file_itm_data.rows_to_read; - // auto const& selected_stripes = _file_itm_data.selected_stripes; - - auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; - auto& lvl_stripe_sizes = _file_itm_data.lvl_stripe_sizes; - auto& read_info = _file_itm_data.data_read_info; + auto const& selected_stripes = _file_itm_data.selected_stripes; + auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; + auto& lvl_stripe_sizes = _file_itm_data.lvl_stripe_sizes; + auto& read_info = _file_itm_data.data_read_info; // std::size_t num_stripes = selected_stripes.size(); auto const stripe_chunk = @@ -625,8 +630,17 @@ void reader::impl::load_data() // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. stream_id_map stream_compinfo_map; - cudf::detail::hostdevice_vector stripe_decomp_sizes(stripe_chunk.count, _stream); - std::fill(stripe_decomp_sizes.begin(), stripe_decomp_sizes.end(), cumulative_size{1, 0}); + cudf::detail::hostdevice_vector stripe_decomp_sizes(stripe_chunk.count, + _stream); + for (int64_t stripe_idx = 0; stripe_idx < stripe_chunk.count; ++stripe_idx) { + auto const& stripe = selected_stripes[stripe_idx]; + auto const stripe_info = stripe.stripe_info; + + stripe_decomp_sizes[stripe_idx] = cumulative_size_and_row{1, 0, stripe_info->numberOfRows}; + // printf("loading stripe with rows = %d\n", (int)stripe_info->numberOfRows); + } + // std::fill( + // stripe_decomp_sizes.begin(), stripe_decomp_sizes.end(), cumulative_size_and_row{1, 0, 0}); // Parse the decompressed sizes for each stripe. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { @@ -735,6 +749,14 @@ void reader::impl::load_data() return; } + { + int count{0}; + for (auto& size : stripe_decomp_sizes) { + printf("decomp stripe size: %ld, %zu, %zu\n", size.count, size.size_bytes, size.rows); + if (count++ > 5) break; + } + } + // Compute the prefix sum of stripe data sizes. stripe_decomp_sizes.host_to_device_async(_stream); thrust::inclusive_scan(rmm::exec_policy(_stream), @@ -745,23 +767,26 @@ void reader::impl::load_data() stripe_decomp_sizes.device_to_host_sync(_stream); + { + int count{0}; + for (auto& size : stripe_decomp_sizes) { + printf( + "prefix sum decomp stripe size: %ld, %zu, %zu\n", size.count, size.size_bytes, size.rows); + if (count++ > 5) break; + } + } + auto const decode_limit = [&] { auto const tmp = static_cast(_chunk_read_data.data_read_limit * (1.0 - chunk_read_data::load_limit_ratio)); return tmp > 0UL ? tmp : 1UL; }(); _chunk_read_data.decode_stripe_chunks = - find_splits(stripe_decomp_sizes, stripe_chunk.count, decode_limit); + find_splits(stripe_decomp_sizes, stripe_chunk.count, decode_limit); for (auto& chunk : _chunk_read_data.decode_stripe_chunks) { chunk.start_idx += stripe_chunk.start_idx; } - int count{0}; - for (auto& size : stripe_decomp_sizes) { - printf("decomp size: %ld, %zu\n", size.count, size.size_bytes); - if (count++ > 5) break; - } - #ifndef PRINT_DEBUG auto& splits = _chunk_read_data.decode_stripe_chunks; printf("------------\nSplits decode_stripe_chunks (/%d): \n", (int)stripe_chunk.count); diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index f67407d3671..61b27ff7c54 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -252,6 +252,13 @@ struct cumulative_size { std::size_t size_bytes{0}; }; +// TODO +struct cumulative_size_and_row { + int64_t count{0}; + std::size_t size_bytes{0}; + std::size_t rows{0}; +}; + /** * @brief Functor to sum up cumulative sizes. */ @@ -260,15 +267,20 @@ struct cumulative_size_sum { { return cumulative_size{a.count + b.count, a.size_bytes + b.size_bytes}; } + + __device__ cumulative_size_and_row operator()(cumulative_size_and_row const& a, + cumulative_size_and_row const& b) const + { + return cumulative_size_and_row{a.count + b.count, a.size_bytes + b.size_bytes, a.rows + b.rows}; + } }; /** * @brief Find the splits of the input data such that each split has cumulative size less than a * given `size_limit`. */ -std::vector find_splits(host_span sizes, - int64_t total_count, - size_t size_limit); +template +std::vector find_splits(host_span sizes, int64_t total_count, size_t size_limit); // TODO std::pair get_range(std::vector const& input_chunks, diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 327a3124ebe..a0abf546a22 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -799,7 +799,7 @@ std::vector find_table_splits(table_view const& input, segmented_sizes.device_to_host_sync(stream); // Since the segment sizes are in bits, we need to multiply CHAR_BIT with the output limit. - return find_splits(segmented_sizes, input.num_rows(), size_limit * CHAR_BIT); + return find_splits(segmented_sizes, input.num_rows(), size_limit * CHAR_BIT); } } // namespace From 7e451abafa68b5e9baebc3e49e8d7595c108c07a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 6 Mar 2024 21:36:40 -0800 Subject: [PATCH 176/321] Split if num rows exceeds size limit Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 2bd98ce7c42..2a6324da676 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -205,6 +205,8 @@ std::vector find_splits(host_span sizes, int64_t total_count, si int64_t cur_pos{0}; size_t cur_cumulative_size{0}; + [[maybe_unused]] size_t cur_cumulative_rows{0}; + auto const start = thrust::make_transform_iterator( sizes.begin(), [&](auto const& size) { return size.size_bytes - cur_cumulative_size; }); auto const end = start + static_cast(sizes.size()); @@ -220,13 +222,20 @@ std::vector find_splits(host_span sizes, int64_t total_count, si split_pos--; } + if constexpr (std::is_same_v) { + while (split_pos > 0 && sizes[split_pos].rows - cur_cumulative_rows > + static_cast(std::numeric_limits::max())) { + split_pos--; + } + } + // best-try. if we can't find something that'll fit, we have to go bigger. we're doing this in // a loop because all of the cumulative sizes for all the pages are sorted into one big list. // so if we had two columns, both of which had an entry {1000, 10000}, that entry would be in // the list twice. so we have to iterate until we skip past all of them. The idea is that we // either do this, or we have to call unique() on the input first. while (split_pos < (static_cast(sizes.size()) - 1) && - (split_pos < 0 || sizes[split_pos].count == cur_count)) { + (split_pos < 0 || sizes[split_pos].count <= cur_count)) { split_pos++; } @@ -235,6 +244,10 @@ std::vector find_splits(host_span sizes, int64_t total_count, si splits.emplace_back(chunk{start_idx, static_cast(cur_count - start_idx)}); cur_pos = split_pos; cur_cumulative_size = sizes[split_pos].size_bytes; + + if constexpr (std::is_same_v) { + cur_cumulative_rows = sizes[split_pos].rows; + } } // If the last chunk has size smaller than `merge_threshold` percent of the second last one, From 758e2d0f81fd2060fb2e3046efa5e9432edf71d4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 7 Mar 2024 09:03:21 -0800 Subject: [PATCH 177/321] Add test Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cu | 111 +++++++++++++++--------- 1 file changed, 72 insertions(+), 39 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 2857b82d415..7ef9d72d348 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1342,15 +1342,11 @@ TEST_F(OrcChunkedReaderInputLimitTest, ReadWithRowSelection) CUDF_TEST_EXPECT_TABLES_EQUAL(expected, read_result->view()); } -#define LOCAL_TEST - -// This test is extremely heavy, thus it should be disabled by default. -#ifdef LOCAL_TEST TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) { int64_t constexpr num_rows = 500'000'000l; int constexpr rows_per_stripe = 1'000'000; - int constexpr num_reps = 10l; + int constexpr num_reps = 10; int64_t constexpr total_rows = num_rows * num_reps; static_assert(total_rows > std::numeric_limits::max()); @@ -1371,46 +1367,83 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) } } + printf("size: %zu\n", data_buffer.size()); + // Verify metadata. auto const metadata = cudf::io::read_orc_metadata(cudf::io::source_info{data_buffer.data(), data_buffer.size()}); EXPECT_EQ(metadata.num_rows(), total_rows); EXPECT_EQ(metadata.num_stripes(), total_rows / rows_per_stripe); - int constexpr num_rows_to_read = 5'000'000; - const auto num_rows_to_skip = metadata.num_rows() - num_rows_to_read - - 123456 /*just shift the read data region back by a random offset*/; - - // Check validity of the last 5 million rows. - const auto sequence_start = num_rows_to_skip % num_rows; - auto const skipped_col = int64s_col(it + sequence_start, it + sequence_start + num_rows_to_read); - auto const expected = cudf::table_view{{skipped_col}}; - - auto const read_opts = cudf::io::orc_reader_options::builder( - cudf::io::source_info{data_buffer.data(), data_buffer.size()}) - .use_index(false) - .skip_rows(num_rows_to_skip) - .num_rows(num_rows_to_read) - .build(); - auto reader = cudf::io::chunked_orc_reader( - 600'000UL * sizeof(int64_t) /*output limit, equal to 600k int64_t rows */, - 8'000'000UL /*input limit, around size of 1 stripe's decoded data */, - 500'000 /*output granularity, or minimum number of rows for the output chunk*/, - read_opts); - - auto num_chunks = 0; - auto read_tables = std::vector>{}; - auto tviews = std::vector{}; + // Read with row selections and memory limit. + { + int constexpr num_rows_to_read = 5'000'000; + const auto num_rows_to_skip = + metadata.num_rows() - num_rows_to_read - + 123456 /*just shift the read data region back by a random offset*/; + + // Check validity of the last 5 million rows. + const auto sequence_start = num_rows_to_skip % num_rows; + auto const skipped_col = + int64s_col(it + sequence_start, it + sequence_start + num_rows_to_read); + auto const expected = cudf::table_view{{skipped_col}}; + + auto const read_opts = cudf::io::orc_reader_options::builder( + cudf::io::source_info{data_buffer.data(), data_buffer.size()}) + .use_index(false) + .skip_rows(num_rows_to_skip) + .num_rows(num_rows_to_read) + .build(); + auto reader = cudf::io::chunked_orc_reader( + 600'000UL * sizeof(int64_t) /* output limit, equal to 600k int64_t rows */, + 8'000'000UL /* input limit, around size of 1 stripe's decoded data */, + 500'000 /* output granularity, or minimum number of rows for the output chunk */, + read_opts); + + auto num_chunks = 0; + auto read_tables = std::vector>{}; + auto tviews = std::vector{}; + + do { + auto chunk = reader.read_chunk(); + ++num_chunks; + tviews.emplace_back(chunk.tbl->view()); + read_tables.emplace_back(std::move(chunk.tbl)); + } while (reader.has_next()); + + auto const read_result = cudf::concatenate(tviews); + EXPECT_EQ(num_chunks, 10); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, read_result->view()); + } + + // Read with only output limit. + // There is no limit on the memory usage. + // However, the reader should be able to detect and load only enough stripes each time + // to avoid decoding a table having number of rows that exceeds the column size limit. + { + auto const read_opts = cudf::io::orc_reader_options::builder( + cudf::io::source_info{data_buffer.data(), data_buffer.size()}) + .use_index(false) + .build(); + auto reader = cudf::io::chunked_orc_reader( + 600'000UL * sizeof(int64_t) /* output limit, equal to 600k int64_t rows */, + 0UL /* no input limit */, + 500'000 /* output granularity, or minimum number of rows for the output chunk */, + read_opts); + + auto num_chunks = 0; + auto read_tables = std::vector>{}; + auto tviews = std::vector{}; + + do { + auto chunk = reader.read_chunk(); + ++num_chunks; + tviews.emplace_back(chunk.tbl->view()); + read_tables.emplace_back(std::move(chunk.tbl)); + } while (reader.has_next()); - do { - auto chunk = reader.read_chunk(); - ++num_chunks; - tviews.emplace_back(chunk.tbl->view()); - read_tables.emplace_back(std::move(chunk.tbl)); - } while (reader.has_next()); + EXPECT_EQ(num_chunks, 10); - auto const read_result = cudf::concatenate(tviews); - EXPECT_EQ(num_chunks, 10); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, read_result->view()); + // Verify only the last chunk. + } } -#endif From 5de81792ffb18d84ca6f59efcfb9e7153cc0cd3f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 7 Mar 2024 09:27:21 -0800 Subject: [PATCH 178/321] Changing skip and num rows Signed-off-by: Nghia Truong --- cpp/include/cudf/io/orc.hpp | 21 ++++++++++----------- cpp/src/io/orc/aggregate_orc_metadata.cpp | 4 ---- cpp/src/io/orc/reader_impl.cu | 4 ++-- cpp/src/io/orc/reader_impl.hpp | 12 ++++++------ cpp/src/io/orc/reader_impl_chunking.cu | 15 ++++++++------- cpp/tests/io/orc_chunked_reader_test.cu | 3 ++- 6 files changed, 28 insertions(+), 31 deletions(-) diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 99580bd9886..8bf5baef97b 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -57,10 +57,10 @@ class orc_reader_options { // List of individual stripes to read (ignored if empty) std::vector> _stripes; - // Rows to skip from the start; ORC stores the number of rows as uint64_t - uint64_t _skip_rows = 0; + // Rows to skip from the start + int64_t _skip_rows = 0; // Rows to read; `nullopt` is all - std::optional _num_rows; + std::optional _num_rows; // Whether to use row index to speed-up reading bool _use_index = true; @@ -124,7 +124,7 @@ class orc_reader_options { * * @return Number of rows to skip from the start */ - uint64_t get_skip_rows() const { return _skip_rows; } + int64_t get_skip_rows() const { return _skip_rows; } /** * @brief Returns number of row to read. @@ -132,7 +132,7 @@ class orc_reader_options { * @return Number of rows to read; `nullopt` if the option hasn't been set (in which case the file * is read until the end) */ - std::optional const& get_num_rows() const { return _num_rows; } + std::optional const& get_num_rows() const { return _num_rows; } /** * @brief Whether to use row index to speed-up reading. @@ -197,10 +197,9 @@ class orc_reader_options { * @throw cudf::logic_error if a negative value is passed * @throw cudf::logic_error if stripes have been previously set */ - void set_skip_rows(uint64_t rows) + void set_skip_rows(int64_t rows) { CUDF_EXPECTS(rows == 0 or _stripes.empty(), "Can't set both skip_rows along with stripes"); - CUDF_EXPECTS(rows <= std::numeric_limits::max(), "skip_rows is too large"); _skip_rows = rows; } @@ -212,7 +211,7 @@ class orc_reader_options { * @throw cudf::logic_error if a negative value is passed * @throw cudf::logic_error if stripes have been previously set */ - void set_num_rows(size_type nrows) + void set_num_rows(int64_t nrows) { CUDF_EXPECTS(nrows >= 0, "num_rows cannot be negative"); CUDF_EXPECTS(_stripes.empty(), "Can't set both num_rows and stripes"); @@ -270,7 +269,7 @@ class orc_reader_options_builder { * * @param src The source information used to read orc file */ - explicit orc_reader_options_builder(source_info src) : options{std::move(src)} {}; + explicit orc_reader_options_builder(source_info src) : options{std::move(src)} {} /** * @brief Sets names of the column to read. @@ -302,7 +301,7 @@ class orc_reader_options_builder { * @param rows Number of rows * @return this for chaining */ - orc_reader_options_builder& skip_rows(uint64_t rows) + orc_reader_options_builder& skip_rows(int64_t rows) { options.set_skip_rows(rows); return *this; @@ -314,7 +313,7 @@ class orc_reader_options_builder { * @param nrows Number of rows * @return this for chaining */ - orc_reader_options_builder& num_rows(size_type nrows) + orc_reader_options_builder& num_rows(int64_t nrows) { options.set_num_rows(nrows); return *this; diff --git a/cpp/src/io/orc/aggregate_orc_metadata.cpp b/cpp/src/io/orc/aggregate_orc_metadata.cpp index 0a270877154..15049b8b732 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.cpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.cpp @@ -200,10 +200,6 @@ aggregate_orc_metadata::select_stripes( "The number of rows in one stripe exceeds the column size limit.", std::overflow_error); rows_to_read += static_cast(stripe_rows); - - // TODO: remove below - printf(" rows_to_read : %d / %d\n", (int)stripe_rows, (int)rows_to_read); - printf(" stripe to read: %d-%d\n", (int)src_file_idx, (int)stripe_idx); } selected_stripes_mapping.emplace_back( stripe_source_mapping{static_cast(src_file_idx), std::move(stripe_infos)}); diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 43d3a2d38f8..e36d00a1fa6 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -60,7 +60,7 @@ namespace cudf::io::orc::detail { void reader::impl::prepare_data(int64_t skip_rows, - std::optional const& num_rows_opt, + std::optional const& num_rows_opt, std::vector> const& stripes, read_mode mode) { @@ -270,7 +270,7 @@ reader::impl::impl(std::size_t output_size_limit, } table_with_metadata reader::impl::read(int64_t skip_rows, - std::optional const& num_rows_opt, + std::optional const& num_rows_opt, std::vector> const& stripes) { prepare_data(skip_rows, num_rows_opt, stripes, read_mode::READ_ALL); diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 48257659ebb..853055f50ed 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -108,7 +108,7 @@ class reader::impl { * @return The set of columns along with metadata */ table_with_metadata read(int64_t skip_rows, - std::optional const& num_rows_opt, + std::optional const& num_rows_opt, std::vector> const& stripes); /** @@ -136,7 +136,7 @@ class reader::impl { * @param stripes Indices of individual stripes to load if non-empty */ void prepare_data(int64_t skip_rows, - std::optional const& num_rows_opt, + std::optional const& num_rows_opt, std::vector> const& stripes, read_mode mode); @@ -152,8 +152,8 @@ class reader::impl { * stripes for reading each time using the `load_data()` step. This is to ensure that loading * these stripes will not exceed a fixed portion the data read limit. */ - void global_preprocess(uint64_t skip_rows, - std::optional const& num_rows_opt, + void global_preprocess(int64_t skip_rows, + std::optional const& num_rows_opt, std::vector> const& stripes, read_mode mode); @@ -210,8 +210,8 @@ class reader::impl { std::vector decimal128_columns; // control decimals conversion // User specified reading rows/stripes selection. - uint64_t const skip_rows; - std::optional num_read_rows; + int64_t const skip_rows; + std::optional num_read_rows; std::vector> const selected_stripes; } const _config; diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 2a6324da676..340b85b83e3 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -355,8 +355,8 @@ std::pair get_range(std::vector const& input_chunks, return {begin, end}; } -void reader::impl::global_preprocess(uint64_t skip_rows, - std::optional const& num_rows_opt, +void reader::impl::global_preprocess(int64_t skip_rows, + std::optional const& num_rows_opt, std::vector> const& stripes, read_mode mode) { @@ -375,10 +375,10 @@ void reader::impl::global_preprocess(uint64_t skip_rows, "Number or rows to read exceeds the column size limit in READ_ALL mode.", std::overflow_error); - printf("input skip rows: %d, num rows: %d\n", (int)skip_rows, (int)num_rows_opt.value_or(-1)); - printf("actual skip rows: %d, num rows: %d\n", - (int)_file_itm_data.rows_to_skip, - (int)_file_itm_data.rows_to_read); + printf("input skip rows: %lu, num rows: %lu\n", skip_rows, num_rows_opt.value_or(-1)); + printf("actual skip rows: %lu, num rows: %lu\n", + _file_itm_data.rows_to_skip, + _file_itm_data.rows_to_read); // auto const rows_to_skip = _file_itm_data.rows_to_skip; // auto const rows_to_read = _file_itm_data.rows_to_read; @@ -508,7 +508,8 @@ void reader::impl::global_preprocess(uint64_t skip_rows, _chunk_read_data.curr_load_stripe_chunk = 0; // Load all chunks if there is no read limit. - if (_chunk_read_data.data_read_limit == 0) { + if (_chunk_read_data.data_read_limit == 0 && + _file_itm_data.rows_to_read < static_cast(std::numeric_limits::max())) { printf("0 limit: output load stripe chunk = 0, %d\n", (int)num_stripes); _chunk_read_data.load_stripe_chunks = {chunk{0, static_cast(num_stripes)}}; return; diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 7ef9d72d348..30cf854b349 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1376,7 +1376,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) EXPECT_EQ(metadata.num_stripes(), total_rows / rows_per_stripe); // Read with row selections and memory limit. - { + if (0) { int constexpr num_rows_to_read = 5'000'000; const auto num_rows_to_skip = metadata.num_rows() - num_rows_to_read - @@ -1416,6 +1416,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) CUDF_TEST_EXPECT_TABLES_EQUAL(expected, read_result->view()); } + if (1) // Read with only output limit. // There is no limit on the memory usage. // However, the reader should be able to detect and load only enough stripes each time From 31f6b6d2c5b6708a2c986108d67705c64bab3e9c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 7 Mar 2024 11:28:16 -0800 Subject: [PATCH 179/321] Fix test Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 24 ++++++++++++++++++------ cpp/tests/io/orc_chunked_reader_test.cu | 18 ++++++++++-------- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 340b85b83e3..9c4cc7cecd4 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -375,8 +375,8 @@ void reader::impl::global_preprocess(int64_t skip_rows, "Number or rows to read exceeds the column size limit in READ_ALL mode.", std::overflow_error); - printf("input skip rows: %lu, num rows: %lu\n", skip_rows, num_rows_opt.value_or(-1)); - printf("actual skip rows: %lu, num rows: %lu\n", + printf("input skip rows: %ld, num rows: %ld\n", skip_rows, num_rows_opt.value_or(-1l)); + printf("actual skip rows: %ld, num rows: %ld\n", _file_itm_data.rows_to_skip, _file_itm_data.rows_to_read); @@ -508,8 +508,7 @@ void reader::impl::global_preprocess(int64_t skip_rows, _chunk_read_data.curr_load_stripe_chunk = 0; // Load all chunks if there is no read limit. - if (_chunk_read_data.data_read_limit == 0 && - _file_itm_data.rows_to_read < static_cast(std::numeric_limits::max())) { + if (_chunk_read_data.data_read_limit == 0) { printf("0 limit: output load stripe chunk = 0, %d\n", (int)num_stripes); _chunk_read_data.load_stripe_chunks = {chunk{0, static_cast(num_stripes)}}; return; @@ -541,7 +540,7 @@ void reader::impl::global_preprocess(int64_t skip_rows, if (count > 5) break; } - // TODO: handle case for extremely large files. + // If `data_read_limit` is too small, make sure not to pass 0 byte limit to compute splits. auto const load_limit = [&] { auto const tmp = static_cast(_chunk_read_data.data_read_limit * chunk_read_data::load_limit_ratio); @@ -758,11 +757,18 @@ void reader::impl::load_data() // That is because the estimated `max_uncompressed_size` of stream data from // `ParseCompressedStripeData` is just the approximate of the maximum possible size, not the // actual size, which can be much smaller in practice. - if (_chunk_read_data.data_read_limit == 0) { + + // TODO: docs on handle size overflow + if (_chunk_read_data.data_read_limit == 0 && + _file_itm_data.rows_to_read < static_cast(std::numeric_limits::max())) { + printf("0 limit: output decode stripe chunk unchanged\n"); _chunk_read_data.decode_stripe_chunks = {stripe_chunk}; return; } + // TODO: remove + if (_chunk_read_data.data_read_limit == 0) { printf("0 limit but size overflow\n"); } + { int count{0}; for (auto& size : stripe_decomp_sizes) { @@ -791,6 +797,12 @@ void reader::impl::load_data() } auto const decode_limit = [&] { + // In this case, we have no read limit but have to split due to having large input in which + // the number of rows exceed column size limit. + // We will split based on row number, not data size. + if (_chunk_read_data.data_read_limit == 0) { return std::numeric_limits::max(); } + + // If `data_read_limit` is too small, make sure not to pass 0 byte limit to compute splits. auto const tmp = static_cast(_chunk_read_data.data_read_limit * (1.0 - chunk_read_data::load_limit_ratio)); return tmp > 0UL ? tmp : 1UL; diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 30cf854b349..4d4b029d211 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1350,8 +1350,11 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) int64_t constexpr total_rows = num_rows * num_reps; static_assert(total_rows > std::numeric_limits::max()); - auto const it = thrust::make_counting_iterator(int64_t{0}); - auto const col = int64s_col(it, it + num_rows); + using data_col = cudf::test::fixed_width_column_wrapper; + + auto const it = + cudf::detail::make_counting_transform_iterator(0l, [](int64_t i) { return i % 123456789l; }); + auto const col = data_col(it, it + num_rows); auto const chunk_table = cudf::table_view{{col}}; std::vector data_buffer; @@ -1384,9 +1387,8 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) // Check validity of the last 5 million rows. const auto sequence_start = num_rows_to_skip % num_rows; - auto const skipped_col = - int64s_col(it + sequence_start, it + sequence_start + num_rows_to_read); - auto const expected = cudf::table_view{{skipped_col}}; + auto const skipped_col = data_col(it + sequence_start, it + sequence_start + num_rows_to_read); + auto const expected = cudf::table_view{{skipped_col}}; auto const read_opts = cudf::io::orc_reader_options::builder( cudf::io::source_info{data_buffer.data(), data_buffer.size()}) @@ -1395,8 +1397,8 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) .num_rows(num_rows_to_read) .build(); auto reader = cudf::io::chunked_orc_reader( - 600'000UL * sizeof(int64_t) /* output limit, equal to 600k int64_t rows */, - 8'000'000UL /* input limit, around size of 1 stripe's decoded data */, + 600'000UL * sizeof(int32_t) /* output limit, equal to 600k int32_t rows */, + 4'000'000UL /* input limit, around size of 1 stripe's decoded data */, 500'000 /* output granularity, or minimum number of rows for the output chunk */, read_opts); @@ -1427,7 +1429,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) .use_index(false) .build(); auto reader = cudf::io::chunked_orc_reader( - 600'000UL * sizeof(int64_t) /* output limit, equal to 600k int64_t rows */, + 600'000UL * sizeof(int32_t) /* output limit, equal to 600k int64_t rows */, 0UL /* no input limit */, 500'000 /* output granularity, or minimum number of rows for the output chunk */, read_opts); From 07a095a1267d0ed1e47f1397c3c73a0175b45416 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 7 Mar 2024 12:53:01 -0800 Subject: [PATCH 180/321] Fix skip rows and num rows --- cpp/src/io/orc/reader_impl.cu | 7 ++++++- cpp/src/io/orc/reader_impl_chunking.cu | 1 + cpp/src/io/orc/reader_impl_chunking.hpp | 4 ++++ cpp/src/io/orc/reader_impl_decode.cu | 23 +++++++++++++++++------ cpp/tests/io/orc_chunked_reader_test.cu | 22 ++++++++++++---------- 5 files changed, 40 insertions(+), 17 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index e36d00a1fa6..0048a6fd1b9 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -110,7 +110,7 @@ table_with_metadata reader::impl::make_output_chunk() if (_selected_columns.num_levels() == 0) { return {std::make_unique
(), table_metadata{}}; } // If no rows or stripes to read, return empty columns - if (_file_itm_data.has_no_data() || !_chunk_read_data.more_table_chunk_to_output()) { + if (!_chunk_read_data.more_table_chunk_to_output()) { printf("has no next\n"); std::vector> out_columns; auto out_metadata = get_meta_with_user_data(); @@ -159,6 +159,11 @@ table_with_metadata reader::impl::make_output_chunk() << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } + // If this is the last slice, we also delete the decoded_table to free up memory. + if (!_chunk_read_data.more_table_chunk_to_output()) { + _chunk_read_data.decoded_table.reset(nullptr); + } + return std::make_unique
(out_tview, _stream, _mr); }(); diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 9c4cc7cecd4..5157425ea44 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -760,6 +760,7 @@ void reader::impl::load_data() // TODO: docs on handle size overflow if (_chunk_read_data.data_read_limit == 0 && + // TODO: rows_to_read is changed every decode, should we change this? _file_itm_data.rows_to_read < static_cast(std::numeric_limits::max())) { printf("0 limit: output decode stripe chunk unchanged\n"); _chunk_read_data.decode_stripe_chunks = {stripe_chunk}; diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 61b27ff7c54..0aef5285ecf 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -240,6 +240,10 @@ struct chunk_read_data { // Only has more chunk to output if: bool has_next() const { + printf("compute has_next: %d, %d, %d\n", + (int)more_stripe_to_load(), + (int)more_stripe_to_decode(), + (int)more_table_chunk_to_output()); return more_stripe_to_load() || more_stripe_to_decode() || more_table_chunk_to_output(); } }; diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index a0abf546a22..ede4a4b107a 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -783,11 +783,16 @@ std::vector find_table_splits(table_view const& input, return cumulative_size{current_length, static_cast(size)}; }); - // TODO: remove: - segmented_sizes.device_to_host_sync(stream); - printf("total row sizes by segment = %d:\n", (int)segment_length); - for (auto& size : segmented_sizes) { - printf("size: %ld, %zu\n", size.count, size.size_bytes / CHAR_BIT); + { + int count{0}; + // TODO: remove: + segmented_sizes.device_to_host_sync(stream); + printf("total row sizes by segment = %d:\n", (int)segment_length); + for (auto& size : segmented_sizes) { + printf("size: %ld, %zu\n", size.count, size.size_bytes / CHAR_BIT); + if (count > 5) break; + ++count; + } } // TODO: exec_policy_nosync @@ -841,8 +846,14 @@ void reader::impl::decompress_and_decode() "TODO"); } } - rows_to_read = std::min(rows_to_read - rows_to_skip, _file_itm_data.rows_to_read); + rows_to_read = std::min(rows_to_read, _file_itm_data.rows_to_read) - rows_to_skip; + CUDF_EXPECTS(rows_to_read > 0, "Invalid rows_to_read computation."); + + // rows_to_read -= rows_to_skip; _file_itm_data.rows_to_skip = 0; + _file_itm_data.rows_to_read -= rows_to_read; + + printf("decode, skip = %ld, read = %ld\n", rows_to_skip, rows_to_read); CUDF_EXPECTS(rows_to_read <= static_cast(std::numeric_limits::max()), "Number or rows to decode exceeds the column size limit.", diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 4d4b029d211..95437b9e8ca 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1344,16 +1344,18 @@ TEST_F(OrcChunkedReaderInputLimitTest, ReadWithRowSelection) TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) { + using data_type = int16_t; + using data_col = cudf::test::fixed_width_column_wrapper; + int64_t constexpr num_rows = 500'000'000l; int constexpr rows_per_stripe = 1'000'000; int constexpr num_reps = 10; int64_t constexpr total_rows = num_rows * num_reps; static_assert(total_rows > std::numeric_limits::max()); - using data_col = cudf::test::fixed_width_column_wrapper; - - auto const it = - cudf::detail::make_counting_transform_iterator(0l, [](int64_t i) { return i % 123456789l; }); + auto const it = cudf::detail::make_counting_transform_iterator(0l, [](int64_t i) { + return i % static_cast(std::numeric_limits::max() / 2); + }); auto const col = data_col(it, it + num_rows); auto const chunk_table = cudf::table_view{{col}}; @@ -1379,7 +1381,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) EXPECT_EQ(metadata.num_stripes(), total_rows / rows_per_stripe); // Read with row selections and memory limit. - if (0) { + if (1) { int constexpr num_rows_to_read = 5'000'000; const auto num_rows_to_skip = metadata.num_rows() - num_rows_to_read - @@ -1397,9 +1399,9 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) .num_rows(num_rows_to_read) .build(); auto reader = cudf::io::chunked_orc_reader( - 600'000UL * sizeof(int32_t) /* output limit, equal to 600k int32_t rows */, - 4'000'000UL /* input limit, around size of 1 stripe's decoded data */, - 500'000 /* output granularity, or minimum number of rows for the output chunk */, + 600'000UL * sizeof(data_type) /* output limit, equal to 600k rows */, + rows_per_stripe * sizeof(data_type) /* input limit, around size of 1 stripe's decoded data */, + rows_per_stripe / 2 /* output granularity, or minimum number of rows for the output chunk */, read_opts); auto num_chunks = 0; @@ -1414,11 +1416,11 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) } while (reader.has_next()); auto const read_result = cudf::concatenate(tviews); - EXPECT_EQ(num_chunks, 10); + EXPECT_EQ(num_chunks, 11); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, read_result->view()); } - if (1) + if (0) // Read with only output limit. // There is no limit on the memory usage. // However, the reader should be able to detect and load only enough stripes each time From 6a6061ab74a88fba1f2fee6609ad16885490d79d Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 7 Mar 2024 13:26:37 -0800 Subject: [PATCH 181/321] Add test --- cpp/tests/io/orc_chunked_reader_test.cu | 66 ++++++++++++++++--------- 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 95437b9e8ca..43f0fb07a0c 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1353,10 +1353,10 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) int64_t constexpr total_rows = num_rows * num_reps; static_assert(total_rows > std::numeric_limits::max()); - auto const it = cudf::detail::make_counting_transform_iterator(0l, [](int64_t i) { - return i % static_cast(std::numeric_limits::max() / 2); + auto const it = cudf::detail::make_counting_transform_iterator(0l, [num_rows](int64_t i) { + return (i % num_rows) % static_cast(std::numeric_limits::max() / 2); }); - auto const col = data_col(it, it + num_rows); + auto const col = data_col(it, it + num_rows); auto const chunk_table = cudf::table_view{{col}}; std::vector data_buffer; @@ -1372,7 +1372,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) } } - printf("size: %zu\n", data_buffer.size()); + printf("buffer size: %zu\n", data_buffer.size()); // Verify metadata. auto const metadata = @@ -1381,14 +1381,14 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) EXPECT_EQ(metadata.num_stripes(), total_rows / rows_per_stripe); // Read with row selections and memory limit. - if (1) { - int constexpr num_rows_to_read = 5'000'000; - const auto num_rows_to_skip = - metadata.num_rows() - num_rows_to_read - - 123456 /*just shift the read data region back by a random offset*/; + { + int64_t constexpr num_rows_to_read = 5'000'000l; + int64_t const num_rows_to_skip = + static_cast(metadata.num_rows()) - num_rows_to_read - + 123456l /*just shift the read data region back by a random offset*/; // Check validity of the last 5 million rows. - const auto sequence_start = num_rows_to_skip % num_rows; + auto const sequence_start = num_rows_to_skip % num_rows; auto const skipped_col = data_col(it + sequence_start, it + sequence_start + num_rows_to_read); auto const expected = cudf::table_view{{skipped_col}}; @@ -1420,9 +1420,9 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) CUDF_TEST_EXPECT_TABLES_EQUAL(expected, read_result->view()); } - if (0) - // Read with only output limit. - // There is no limit on the memory usage. +#define LOCAL_TEST +#ifdef LOCAL_TEST + // Read with only output limit -- there is no limit on the memory usage. // However, the reader should be able to detect and load only enough stripes each time // to avoid decoding a table having number of rows that exceeds the column size limit. { @@ -1431,24 +1431,44 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) .use_index(false) .build(); auto reader = cudf::io::chunked_orc_reader( - 600'000UL * sizeof(int32_t) /* output limit, equal to 600k int64_t rows */, + static_cast(rows_per_stripe * 5.7) * + sizeof(data_type) /* output limit, equal to 5.2M rows */, 0UL /* no input limit */, - 500'000 /* output granularity, or minimum number of rows for the output chunk */, + rows_per_stripe / 2 /* output granularity, or minimum number of rows for the output chunk */, read_opts); - auto num_chunks = 0; - auto read_tables = std::vector>{}; - auto tviews = std::vector{}; + int num_chunks = 0; + int64_t num_read_rows = 0; + int64_t test_rows_start = 0; + auto test_chunk = std::unique_ptr{}; do { - auto chunk = reader.read_chunk(); + auto chunk = reader.read_chunk(); + auto const chunk_rows = chunk.tbl->num_rows(); + + // Just randomly select one output chunk to verify. + if (num_chunks == 123) { + test_rows_start = num_read_rows; + test_chunk = std::move(chunk.tbl); + } + ++num_chunks; - tviews.emplace_back(chunk.tbl->view()); - read_tables.emplace_back(std::move(chunk.tbl)); + num_read_rows += chunk_rows; } while (reader.has_next()); - EXPECT_EQ(num_chunks, 10); + EXPECT_EQ(num_read_rows, total_rows); + + // Typically, we got a chunk having 5M rows. + // However, since the reader internally splits file stripes that are not multiple of 5 stripes, + // we may have some extra chunks that have less than 5M rows. + EXPECT_EQ(num_chunks, 1002); - // Verify only the last chunk. + // Verify the selected chunk. + using namespace cudf::test::iterators; + auto const skipped_col = + data_col(it + test_rows_start, it + test_rows_start + test_chunk->num_rows(), no_nulls()); + auto const expected = cudf::table_view{{skipped_col}}; + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, test_chunk->view()); } +#endif // LOCAL_TEST } From d8c7c449f5025a959372bde0dbab1f25f1429a90 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 7 Mar 2024 14:09:58 -0800 Subject: [PATCH 182/321] Fix a bug --- cpp/src/io/orc/reader_impl_decode.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index ede4a4b107a..cab80235ea3 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -846,8 +846,8 @@ void reader::impl::decompress_and_decode() "TODO"); } } - rows_to_read = std::min(rows_to_read, _file_itm_data.rows_to_read) - rows_to_skip; - CUDF_EXPECTS(rows_to_read > 0, "Invalid rows_to_read computation."); + CUDF_EXPECTS(rows_to_read > rows_to_skip, "Invalid rows_to_read computation."); + rows_to_read = std::min(rows_to_read - rows_to_skip, _file_itm_data.rows_to_read); // rows_to_read -= rows_to_skip; _file_itm_data.rows_to_skip = 0; From c33ebce5c7520c4a643bcbd02e0780c10f412247 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 7 Mar 2024 14:56:50 -0800 Subject: [PATCH 183/321] Fix return order bug --- cpp/src/io/orc/reader_impl.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 0048a6fd1b9..51d14b739e3 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -159,12 +159,14 @@ table_with_metadata reader::impl::make_output_chunk() << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } + auto output = std::make_unique
(out_tview, _stream, _mr); + // If this is the last slice, we also delete the decoded_table to free up memory. if (!_chunk_read_data.more_table_chunk_to_output()) { _chunk_read_data.decoded_table.reset(nullptr); } - return std::make_unique
(out_tview, _stream, _mr); + return output; }(); #endif From e7958cc078380b7350a9c6be717d7a0dc027ddf0 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 7 Mar 2024 14:58:15 -0800 Subject: [PATCH 184/321] Change local test Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cu | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 43f0fb07a0c..cded7a300de 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1420,7 +1420,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) CUDF_TEST_EXPECT_TABLES_EQUAL(expected, read_result->view()); } -#define LOCAL_TEST +// #define LOCAL_TEST #ifdef LOCAL_TEST // Read with only output limit -- there is no limit on the memory usage. // However, the reader should be able to detect and load only enough stripes each time @@ -1470,5 +1470,8 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) auto const expected = cudf::table_view{{skipped_col}}; CUDF_TEST_EXPECT_TABLES_EQUAL(expected, test_chunk->view()); } + + printf("done local test\n"); + fflush(stdout); #endif // LOCAL_TEST } From 52951875ba414e0ee45ff3233324158908a03e28 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 7 Mar 2024 16:29:40 -0800 Subject: [PATCH 185/321] Add changes in `hostdevice_vector.hpp` ahead of time Signed-off-by: Nghia Truong --- cpp/src/io/utilities/hostdevice_vector.hpp | 24 ++++++++-------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index c3e2c4cc8e2..af1591b709a 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -26,13 +26,9 @@ #include #include -#include +#include #include -#include - -#include - namespace cudf::detail { /** @@ -64,7 +60,6 @@ class hostdevice_vector { h_data.reserve(max_size); h_data.resize(initial_size); - current_size = initial_size; d_data.resize(max_size, stream); } @@ -72,20 +67,14 @@ class hostdevice_vector { { CUDF_EXPECTS(size() < capacity(), "Cannot insert data into hostdevice_vector because capacity has been exceeded."); - h_data[current_size++] = data; + h_data.push_back(data); } [[nodiscard]] size_t capacity() const noexcept { return d_data.size(); } - [[nodiscard]] size_t size() const noexcept { return current_size; } + [[nodiscard]] size_t size() const noexcept { return h_data.size(); } [[nodiscard]] size_t size_bytes() const noexcept { return sizeof(T) * size(); } [[nodiscard]] bool empty() const noexcept { return size() == 0; } - [[nodiscard]] T& front() { return h_data.front(); } - [[nodiscard]] T const& front() const { return front(); } - - [[nodiscard]] T& back() { return h_data.back(); } - [[nodiscard]] T const& back() const { return back(); } - [[nodiscard]] T& operator[](size_t i) { return h_data[i]; } [[nodiscard]] T const& operator[](size_t i) const { return h_data[i]; } @@ -98,6 +87,12 @@ class hostdevice_vector { [[nodiscard]] T* end() { return host_ptr(size()); } [[nodiscard]] T const* end() const { return host_ptr(size()); } + [[nodiscard]] T& front() { return h_data.front(); } + [[nodiscard]] T const& front() const { return front(); } + + [[nodiscard]] T& back() { return h_data.back(); } + [[nodiscard]] T const& back() const { return back(); } + [[nodiscard]] T* device_ptr(size_t offset = 0) { return d_data.data() + offset; } [[nodiscard]] T const* device_ptr(size_t offset = 0) const { return d_data.data() + offset; } @@ -181,7 +176,6 @@ class hostdevice_vector { private: cudf::detail::rmm_host_vector h_data; - size_t current_size = 0; rmm::device_uvector d_data; }; From fe2f55eea9bab2728eab1cb39664d5bbd4892198 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 7 Mar 2024 19:41:54 -0800 Subject: [PATCH 186/321] Fix style Signed-off-by: Nghia Truong --- cpp/benchmarks/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 516338febca..caf3b35b629 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -255,7 +255,7 @@ ConfigureNVBench( # ################################################################################################## # * orc reader benchmark -------------------------------------------------------------------------- -# TODO: add back the removed file, and add new file +# TODO: add back the removed file, and add new file ConfigureNVBench(ORC_READER_NVBENCH io/orc/orc_reader_input.cpp) # ################################################################################################## From 0ced9f43d38ae70df80cfe0c41724399d9f72f76 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 7 Mar 2024 21:27:31 -0800 Subject: [PATCH 187/321] Fix doxygen Signed-off-by: Nghia Truong --- cpp/include/cudf/io/detail/orc.hpp | 10 ++++++---- cpp/include/cudf/io/orc.hpp | 19 +++++++++---------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index d532cee5677..8cc41bd5057 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -88,8 +88,9 @@ class chunked_reader : private reader { public: /** * @copydoc cudf::io::chunked_orc_reader::chunked_orc_reader(std::size_t, std::size_t, size_type, - * std::vector>&&, orc_reader_options const&, - * rmm::cuda_stream_view, rmm::mr::device_memory_resource*) + * orc_reader_options const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) + * + * @param sources Input `datasource` objects to read the dataset from */ explicit chunked_reader(std::size_t output_size_limit, std::size_t data_read_limit, @@ -100,8 +101,9 @@ class chunked_reader : private reader { rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::io::chunked_orc_reader::chunked_orc_reader(std::size_t, std::size_t, - * std::vector>&&, orc_reader_options const&, - * rmm::cuda_stream_view, rmm::mr::device_memory_resource*) + * orc_reader_options const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) + * + * @param sources Input `datasource` objects to read the dataset from */ explicit chunked_reader(std::size_t output_size_limit, std::size_t data_read_limit, diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 8bf5baef97b..259c5c1016a 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -423,7 +423,8 @@ class chunked_orc_reader { chunked_orc_reader() = default; /** - * @brief Constructor from size limits and an array of data sources with reader options. + * @brief Construct the reader from input/output size limits, output row granularity, along with + * other ORC reader options. * * The typical usage should be similar to this: * ``` @@ -459,7 +460,6 @@ class chunked_orc_reader { * or `0` if there is no limit * @param output_row_granularity The granularity parameter used for subdividing the decoded * table for final output - * @param sources Input `datasource` objects to read the dataset from * @param options Settings for controlling reading behaviors * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation @@ -473,7 +473,7 @@ class chunked_orc_reader { rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Constructor from size limits and an array of data sources with reader options. + * @brief Construct the reader from input/output size limits along with other ORC reader options. * * This constructor implicitly call the other constructor with `output_row_granularity` set to * 10'000 rows. @@ -482,7 +482,6 @@ class chunked_orc_reader { * or `0` if there is no limit * @param data_read_limit Limit on temporary memory usage for reading the data sources, * or `0` if there is no limit - * @param sources Input `datasource` objects to read the dataset from * @param options Settings for controlling reading behaviors * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation @@ -495,14 +494,13 @@ class chunked_orc_reader { rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Constructor from output size limit and an array of data sources with reader options. + * @brief Construct the reader from output size limits along with other ORC reader options. * * This constructor implicitly call the other constructor with `data_read_limit` set to `0` and * `output_row_granularity` set to 10'000 rows. * * @param output_size_limit Limit on total number of bytes to be returned per `read_chunk()` call, * or `0` if there is no limit - * @param sources Input `datasource` objects to read the dataset from * @param options Settings for controlling reading behaviors * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation @@ -512,25 +510,26 @@ class chunked_orc_reader { orc_reader_options const& options, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Destructor, destroying the internal reader instance. */ ~chunked_orc_reader(); /** - * @brief Check if there is any data in the given file has not yet read. + * @brief Check if there is any data in the given data sources has not yet read. * * @return A boolean value indicating if there is any data left to read */ [[nodiscard]] bool has_next() const; /** - * @brief Read a chunk of rows in the given ORC file. + * @brief Read a chunk of rows in the given data sources. * * The sequence of returned tables, if concatenated by their order, guarantees to form a complete - * dataset as reading the entire given file at once. + * dataset as reading the entire given data sources at once. * - * An empty table will be returned if the given file is empty, or all the data in the file has + * An empty table will be returned if the given sources are empty, or all the data has * been read and returned by the previous calls. * * @return An output `cudf::table` along with its metadata From 223f078e1507c1472ce64ec8ecb2057eb47d6d78 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 7 Mar 2024 22:41:28 -0800 Subject: [PATCH 188/321] Rename struct Signed-off-by: Nghia Truong --- cpp/src/io/orc/aggregate_orc_metadata.cpp | 10 +++++----- cpp/src/io/orc/aggregate_orc_metadata.hpp | 2 +- cpp/src/io/orc/orc.hpp | 4 ++-- cpp/src/io/orc/reader_impl_chunking.hpp | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cpp/src/io/orc/aggregate_orc_metadata.cpp b/cpp/src/io/orc/aggregate_orc_metadata.cpp index 15049b8b732..4f89142bdf9 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.cpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.cpp @@ -152,7 +152,7 @@ aggregate_orc_metadata::aggregate_orc_metadata( } } -std::tuple> +std::tuple> aggregate_orc_metadata::select_stripes( std::vector> const& user_specified_stripes, int64_t skip_rows, @@ -169,7 +169,7 @@ aggregate_orc_metadata::select_stripes( struct stripe_source_mapping { int source_idx; - std::vector stripe_info; + std::vector stripe_info; }; std::vector selected_stripes_mapping; @@ -181,7 +181,7 @@ aggregate_orc_metadata::select_stripes( // Each vector entry represents a source file; each nested vector represents the // user_defined_stripes to get from that source file for (size_t src_file_idx = 0; src_file_idx < user_specified_stripes.size(); ++src_file_idx) { - std::vector stripe_infos; + std::vector stripe_infos; // Coalesce stripe info at the source file later since that makes downstream processing much // easier in impl::read @@ -211,7 +211,7 @@ aggregate_orc_metadata::select_stripes( for (size_t src_file_idx = 0; src_file_idx < per_file_metadata.size() && count < rows_to_skip + rows_to_read; ++src_file_idx) { - std::vector stripe_infos; + std::vector stripe_infos; for (size_t stripe_idx = 0; stripe_idx < per_file_metadata[src_file_idx].ff.stripes.size() && count < rows_to_skip + rows_to_read; @@ -239,7 +239,7 @@ aggregate_orc_metadata::select_stripes( rows_to_skip -= stripe_skip_rows; } - std::vector output; + std::vector output; // Read each stripe's stripefooter metadata for (auto& mapping : selected_stripes_mapping) { diff --git a/cpp/src/io/orc/aggregate_orc_metadata.hpp b/cpp/src/io/orc/aggregate_orc_metadata.hpp index 613c08fb745..65d1f0a7ad4 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.hpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.hpp @@ -113,7 +113,7 @@ class aggregate_orc_metadata { * * Stripes are potentially selected from multiple files. */ - [[nodiscard]] std::tuple> select_stripes( + [[nodiscard]] std::tuple> select_stripes( std::vector> const& user_specified_stripes, int64_t skip_rows, std::optional const& num_rows, diff --git a/cpp/src/io/orc/orc.hpp b/cpp/src/io/orc/orc.hpp index 4a35aaf5107..fd55cbb6846 100644 --- a/cpp/src/io/orc/orc.hpp +++ b/cpp/src/io/orc/orc.hpp @@ -603,12 +603,12 @@ struct column_validity_info { */ class metadata { public: - struct OrcStripeInfo { + struct orc_stripe_info { StripeInformation const* stripe_info; StripeFooter const* stripe_footer; int source_idx; }; - std::vector stripe_info; + std::vector stripe_info; public: explicit metadata(datasource* const src, rmm::cuda_stream_view stream); diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 0aef5285ecf..95dd2fc13a2 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -124,7 +124,7 @@ struct range { struct file_intermediate_data { int64_t rows_to_skip; int64_t rows_to_read; - std::vector selected_stripes; + std::vector selected_stripes; // Return true if no rows or stripes to read. bool has_no_data() const { return rows_to_read == 0 || selected_stripes.empty(); } From 112131fcf60c34885446b750ac2cab6e34aa885c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 7 Mar 2024 22:42:48 -0800 Subject: [PATCH 189/321] Change error message Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader.cu b/cpp/src/io/orc/reader.cu index 5ffff3d7d40..af6a3a79817 100644 --- a/cpp/src/io/orc/reader.cu +++ b/cpp/src/io/orc/reader.cu @@ -57,7 +57,8 @@ chunked_reader::chunked_reader(std::size_t output_size_limit, rmm::mr::device_memory_resource* mr) : reader() { - CUDF_EXPECTS(output_row_granularity > 0, "Invalid value of `output_row_granularity`."); + CUDF_EXPECTS(output_row_granularity > 0, + "The value of `output_row_granularity` must be positive."); _impl = std::make_unique(output_size_limit, data_read_limit, output_row_granularity, From be544f5bb89435b690b38b0262bf1aca381a493b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 8 Mar 2024 09:32:18 -0800 Subject: [PATCH 190/321] Reverse changes in parquet code Signed-off-by: Nghia Truong --- cpp/src/io/parquet/reader_impl.cpp | 16 ++++++++-------- cpp/src/io/parquet/reader_impl.hpp | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index c930aa7f969..89562514564 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -416,8 +416,8 @@ void reader::impl::populate_metadata(table_metadata& out_metadata) table_with_metadata reader::impl::read_chunk_internal( bool uses_custom_row_bounds, std::optional> filter) { - // If `_out_metadata` has been constructed, just copy it over. - auto out_metadata = _out_metadata ? table_metadata{*_out_metadata} : table_metadata{}; + // If `_output_metadata` has been constructed, just copy it over. + auto out_metadata = _output_metadata ? table_metadata{*_output_metadata} : table_metadata{}; out_metadata.schema_info.resize(_output_buffers.size()); // output cudf columns as determined by the top level schema @@ -448,8 +448,8 @@ table_with_metadata reader::impl::read_chunk_internal( metadata = std::make_optional(); metadata->set_convert_binary_to_strings(false); } - // Only construct `out_metadata` if `_out_metadata` has not been cached. - if (!_out_metadata) { + // Only construct `out_metadata` if `_output_metadata` has not been cached. + if (!_output_metadata) { column_name_info& col_name = out_metadata.schema_info[i]; out_columns.emplace_back(make_column(_output_buffers[i], &col_name, metadata, _stream)); } else { @@ -468,7 +468,7 @@ table_with_metadata reader::impl::finalize_output( { // Create empty columns as needed (this can happen if we've ended up with no actual data to read) for (size_t i = out_columns.size(); i < _output_buffers.size(); ++i) { - if (!_out_metadata) { + if (!_output_metadata) { column_name_info& col_name = out_metadata.schema_info[i]; out_columns.emplace_back(io::detail::empty_like(_output_buffers[i], &col_name, _stream, _mr)); } else { @@ -476,10 +476,10 @@ table_with_metadata reader::impl::finalize_output( } } - if (!_out_metadata) { + if (!_output_metadata) { populate_metadata(out_metadata); - // Finally, save the output table metadata into `_out_metadata` for reuse next time. - _out_metadata = std::make_unique(out_metadata); + // Finally, save the output table metadata into `_output_metadata` for reuse next time. + _output_metadata = std::make_unique(out_metadata); } // advance output chunk/subpass/pass info diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp index e6d63166029..185419a5b46 100644 --- a/cpp/src/io/parquet/reader_impl.hpp +++ b/cpp/src/io/parquet/reader_impl.hpp @@ -364,7 +364,7 @@ class reader::impl { std::vector _output_column_schemas; // _output_buffers associated metadata - std::unique_ptr _out_metadata; + std::unique_ptr _output_metadata; bool _strings_to_categorical = false; From ead3124ff54c3ac1c91893204c9bd06c05f1670b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 8 Mar 2024 10:39:58 -0800 Subject: [PATCH 191/321] Fix option access Signed-off-by: Nghia Truong --- cpp/src/io/parquet/reader_impl_helpers.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/parquet/reader_impl_helpers.cpp b/cpp/src/io/parquet/reader_impl_helpers.cpp index ddf0f55b6af..6c3cba8059c 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.cpp +++ b/cpp/src/io/parquet/reader_impl_helpers.cpp @@ -618,7 +618,9 @@ aggregate_reader_metadata::select_row_groups( auto [rows_to_skip, rows_to_read] = [&]() { if (not row_group_indices.empty()) { return std::pair{}; } auto const from_opts = cudf::io::detail::skip_rows_num_rows_from_options( - skip_rows_opt, std::optional{num_rows_opt.value()}, get_num_rows()); + skip_rows_opt, + num_rows_opt.has_value() ? std::optional{num_rows_opt.value()} : std::nullopt, + get_num_rows()); return std::pair{static_cast(from_opts.first), static_cast(from_opts.second)}; }(); From 74d14d11ff48939583d13351c513d5fd49c596be Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 8 Mar 2024 12:16:25 -0800 Subject: [PATCH 192/321] Remove outdated test Signed-off-by: Nghia Truong --- cpp/tests/io/row_selection_test.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/cpp/tests/io/row_selection_test.cpp b/cpp/tests/io/row_selection_test.cpp index 0c259c81a23..ebadd870091 100644 --- a/cpp/tests/io/row_selection_test.cpp +++ b/cpp/tests/io/row_selection_test.cpp @@ -122,17 +122,4 @@ TEST_F(FromOptsTest, LimitOptionsToFileRows) } } -TEST_F(FromOptsTest, OverFlowDetection) -{ - auto const too_large_for_32bit = std::numeric_limits::max(); - - // Too many rows to read until the end of the file - EXPECT_THROW(skip_rows_num_rows_from_options(0, std::nullopt, too_large_for_32bit), - std::overflow_error); - - // Should work fine with num_rows - EXPECT_NO_THROW( - skip_rows_num_rows_from_options(1000, too_large_for_32bit - 100, too_large_for_32bit)); -} - CUDF_TEST_PROGRAM_MAIN() From 07103ad2a286bd47028a79168a52445e115502d9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 8 Mar 2024 12:44:07 -0800 Subject: [PATCH 193/321] Wrap the debug print lines in `#ifdef/#endif` Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 69 +++++++------- cpp/src/io/orc/reader_impl.hpp | 2 + cpp/src/io/orc/reader_impl_chunking.cu | 121 +++++++----------------- cpp/src/io/orc/reader_impl_chunking.hpp | 3 + cpp/src/io/orc/reader_impl_decode.cu | 85 +++++++++++++---- cpp/tests/io/orc_chunked_reader_test.cu | 5 - 6 files changed, 139 insertions(+), 146 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 51d14b739e3..1061a7ec64f 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -14,12 +14,9 @@ * limitations under the License. */ -// #define PRINT_DEBUG - // TODO: remove #include -#include // // // @@ -72,46 +69,55 @@ void reader::impl::prepare_data(int64_t skip_rows, // There are no columns in the table. if (_selected_columns.num_levels() == 0) { return; } +#ifdef LOCAL_TEST std::cout << "call global, skip = " << skip_rows << std::endl; +#endif global_preprocess(skip_rows, num_rows_opt, stripes, mode); if (!_chunk_read_data.more_table_chunk_to_output()) { if (!_chunk_read_data.more_stripe_to_decode() && _chunk_read_data.more_stripe_to_load()) { +#ifdef LOCAL_TEST printf("load more data\n\n"); +#endif + load_data(); } if (_chunk_read_data.more_stripe_to_decode()) { +#ifdef LOCAL_TEST printf("decode more data\n\n"); +#endif + decompress_and_decode(); } } +#ifdef LOCAL_TEST printf("done load and decode data\n\n"); - - // decompress_and_decode(); - // while (_chunk_read_data.more_stripe_to_decode()) { - // decompress_and_decode(); - // _file_itm_data.out_buffers.push_back(std::move(_out_buffers)); - // } +#endif } table_with_metadata reader::impl::make_output_chunk() { +#ifdef LOCAL_TEST { _stream.synchronize(); auto peak_mem = mem_stats_logger.peak_memory_usage(); std::cout << "start to make out, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } +#endif // There is no columns in the table. if (_selected_columns.num_levels() == 0) { return {std::make_unique
(), table_metadata{}}; } // If no rows or stripes to read, return empty columns if (!_chunk_read_data.more_table_chunk_to_output()) { +#ifdef LOCAL_TEST printf("has no next\n"); +#endif + std::vector> out_columns; auto out_metadata = get_meta_with_user_data(); std::transform(_selected_columns.levels[0].begin(), @@ -130,20 +136,23 @@ table_with_metadata reader::impl::make_output_chunk() return {std::make_unique
(std::move(out_columns)), std::move(out_metadata)}; } -#if 1 auto out_table = [&] { if (_chunk_read_data.output_table_chunks.size() == 1) { _chunk_read_data.curr_output_table_chunk++; +#ifdef LOCAL_TEST printf("one chunk, no more table---------------------------------\n"); +#endif return std::move(_chunk_read_data.decoded_table); } +#ifdef LOCAL_TEST { _stream.synchronize(); auto peak_mem = mem_stats_logger.peak_memory_usage(); std::cout << "prepare to make out, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } +#endif auto const out_chunk = _chunk_read_data.output_table_chunks[_chunk_read_data.curr_output_table_chunk++]; @@ -152,12 +161,15 @@ table_with_metadata reader::impl::make_output_chunk() {static_cast(out_chunk.start_idx), static_cast(out_chunk.start_idx + out_chunk.count)}, _stream)[0]; + +#ifdef LOCAL_TEST { _stream.synchronize(); auto peak_mem = mem_stats_logger.peak_memory_usage(); std::cout << "done make out, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } +#endif auto output = std::make_unique
(out_tview, _stream, _mr); @@ -169,8 +181,7 @@ table_with_metadata reader::impl::make_output_chunk() return output; }(); -#endif - +#ifdef LOCAL_TEST if (!_chunk_read_data.has_next()) { static int count{0}; count++; @@ -184,6 +195,7 @@ table_with_metadata reader::impl::make_output_chunk() std::cout << "done, partial, peak_memory_usage: " << peak_mem << " , MB = " << (peak_mem * 1.0) / (1024.0 * 1024.0) << std::endl; } +#endif return {std::move(out_table), _out_metadata}; } @@ -267,13 +279,6 @@ reader::impl::impl(std::size_t output_size_limit, data_read_limit, output_row_granularity > 0 ? output_row_granularity : DEFAULT_OUTPUT_ROW_GRANULARITY} { - printf("construct reader , limit = %d, %d, gradunarity %d \n", - - (int)output_size_limit, - (int)data_read_limit, - (int)output_row_granularity - - ); } table_with_metadata reader::impl::read(int64_t skip_rows, @@ -286,16 +291,23 @@ table_with_metadata reader::impl::read(int64_t skip_rows, bool reader::impl::has_next() { +#ifdef LOCAL_TEST printf("==================query has next \n"); +#endif + prepare_data( _config.skip_rows, _config.num_read_rows, _config.selected_stripes, read_mode::CHUNKED_READ); +#ifdef LOCAL_TEST printf("has next: %d\n", (int)_chunk_read_data.has_next()); +#endif + return _chunk_read_data.has_next(); } table_with_metadata reader::impl::read_chunk() { +#ifdef LOCAL_TEST printf("==================call read chunk\n"); { _stream.synchronize(); @@ -303,34 +315,19 @@ table_with_metadata reader::impl::read_chunk() std::cout << "\n\n\n------------start read chunk, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } - - { - static int count{0}; - ++count; - -#if 0 - if (count == 3) { - _file_itm_data.lvl_stripe_data.clear(); - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << "clear all, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } - exit(0); - } #endif - } prepare_data( _config.skip_rows, _config.num_read_rows, _config.selected_stripes, read_mode::CHUNKED_READ); +#ifdef LOCAL_TEST { _stream.synchronize(); auto peak_mem = mem_stats_logger.peak_memory_usage(); std::cout << "done prepare data, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } +#endif return make_output_chunk(); } diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 853055f50ed..84033ca0778 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -38,7 +38,9 @@ class memory_stats_logger { public: explicit memory_stats_logger(rmm::mr::device_memory_resource* mr) : existing_mr(mr) { +#ifdef LOCAL_TEST printf("exist mr: %p\n", mr); +#endif statistics_mr = std::make_unique>( diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 5157425ea44..c4e094f47dd 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -14,8 +14,6 @@ * limitations under the License. */ -// #define PRINT_DEBUG - #include "io/comp/gpuinflate.hpp" #include "io/comp/nvcomp_adapter.hpp" #include "io/orc/reader_impl.hpp" @@ -108,8 +106,12 @@ std::size_t gather_stream_info_and_column_desc( // Ignore reading this stream from source. // cudf::logger().warn("Unexpected stream in the input ORC source. The stream will be // ignored."); + +#ifdef LOCAL_TEST printf("Unexpected stream in the input ORC source. The stream will be ignored\n"); fflush(stdout); +#endif + src_offset += stream.length; continue; } @@ -271,65 +273,6 @@ template std::vector find_splits( host_span sizes, int64_t total_count, size_t size_limit); #endif -namespace { - -#ifdef PRINT_DEBUG -/** - * @brief Verify the splits, checking if they are correct. - * - * We need to verify that: - * 1. All chunk must have count > 0 - * 2. Chunks are continuous. - * 3. sum(all sizes in a chunk) < size_limit - * 4. sum(all counts in all chunks) == total_count. - */ -void verify_splits(host_span splits, - host_span sizes, - size_type total_count, - size_t size_limit) -{ - chunk last_split{0, 0}; - int64_t count{0}; - size_t cur_cumulative_size{0}; - for (auto const& split : splits) { - CUDF_EXPECTS(split.count > 0, "Invalid split count."); - CUDF_EXPECTS(last_split.start_idx + last_split.count == split.start_idx, - "Invalid split start_idx."); - count += split.count; - last_split = split; - - if (split.count > 1) { - // printf("split: %ld - %ld, size: %zu, limit: %zu\n", - // split.start_idx, - // split.count, - // sizes[split.start_idx + split.count - 1].size_bytes - cur_cumulative_size, - // size_limit); - // fflush(stdout); - CUDF_EXPECTS( - sizes[split.start_idx + split.count - 1].size_bytes - cur_cumulative_size <= size_limit, - "Chunk total size exceeds limit."); - if (split.start_idx + split.count < total_count) { - // printf("wrong split: %ld - %ld, size: %zu, limit: %zu\n", - // split.start_idx, - // split.count + 1, - // sizes[split.start_idx + split.count].size_bytes - cur_cumulative_size, - // size_limit); - - CUDF_EXPECTS( - sizes[split.start_idx + split.count].size_bytes - cur_cumulative_size > size_limit, - "Invalid split."); - } - } - cur_cumulative_size = sizes[split.start_idx + split.count - 1].size_bytes; - } - CUDF_EXPECTS(last_split.start_idx + last_split.count == sizes.back().count, - "Invalid split start_idx."); - CUDF_EXPECTS(count == total_count, "Invalid total count."); -} -#endif - -} // namespace - /** * @brief Find range of the data span by a given chunk of chunks. * @@ -375,10 +318,12 @@ void reader::impl::global_preprocess(int64_t skip_rows, "Number or rows to read exceeds the column size limit in READ_ALL mode.", std::overflow_error); +#ifdef LOCAL_TEST printf("input skip rows: %ld, num rows: %ld\n", skip_rows, num_rows_opt.value_or(-1l)); printf("actual skip rows: %ld, num rows: %ld\n", _file_itm_data.rows_to_skip, _file_itm_data.rows_to_read); +#endif // auto const rows_to_skip = _file_itm_data.rows_to_skip; // auto const rows_to_read = _file_itm_data.rows_to_read; @@ -400,7 +345,9 @@ void reader::impl::global_preprocess(int64_t skip_rows, // Get the total number of stripes across all input files. auto const num_stripes = selected_stripes.size(); +#ifdef LOCAL_TEST printf("num load stripe: %d\n", (int)num_stripes); +#endif stripe_data_read_chunks.resize(num_stripes); lvl_stripe_stream_chunks.resize(_selected_columns.num_levels()); @@ -509,11 +456,15 @@ void reader::impl::global_preprocess(int64_t skip_rows, // Load all chunks if there is no read limit. if (_chunk_read_data.data_read_limit == 0) { +#ifdef LOCAL_TEST printf("0 limit: output load stripe chunk = 0, %d\n", (int)num_stripes); +#endif + _chunk_read_data.load_stripe_chunks = {chunk{0, static_cast(num_stripes)}}; return; } +#ifdef LOCAL_TEST printf("total stripe sizes:\n"); int count{0}; for (auto& size : total_stripe_sizes) { @@ -521,6 +472,7 @@ void reader::impl::global_preprocess(int64_t skip_rows, printf("size: %ld, %zu\n", size.count, size.size_bytes); if (count > 5) break; } +#endif // Compute the prefix sum of stripe data sizes. total_stripe_sizes.host_to_device_async(_stream); @@ -532,6 +484,7 @@ void reader::impl::global_preprocess(int64_t skip_rows, total_stripe_sizes.device_to_host_sync(_stream); +#ifdef LOCAL_TEST count = 0; printf("prefix sum total stripe sizes:\n"); for (auto& size : total_stripe_sizes) { @@ -539,6 +492,7 @@ void reader::impl::global_preprocess(int64_t skip_rows, printf("size: %ld, %zu\n", size.count, size.size_bytes); if (count > 5) break; } +#endif // If `data_read_limit` is too small, make sure not to pass 0 byte limit to compute splits. auto const load_limit = [&] { @@ -549,24 +503,13 @@ void reader::impl::global_preprocess(int64_t skip_rows, _chunk_read_data.load_stripe_chunks = find_splits(total_stripe_sizes, num_stripes, load_limit); -#ifndef PRINT_DEBUG +#ifdef LOCAL_TEST auto& splits = _chunk_read_data.load_stripe_chunks; printf("------------\nSplits (/total num stripe = %d): \n", (int)num_stripes); for (size_t idx = 0; idx < splits.size(); idx++) { printf("{%ld, %ld}\n", splits[idx].start_idx, splits[idx].count); } fflush(stdout); - - // std::cout << " total rows: " << _file_itm_data.rows_to_read << std::endl; - // print_cumulative_row_info(stripe_size_bytes, " ", _chunk_read_info.chunks); - - // We need to verify that: - // 1. All chunk must have count > 0 - // 2. Chunks are continuous. - // 3. sum(sizes of stripes in a chunk) < size_limit if chunk has more than 1 stripe - // 4. sum(number of stripes in all chunks) == total_num_stripes. - // TODO: enable only in debug. -// verify_splits(splits, total_stripe_sizes, num_stripes, _chunk_read_data.data_read_limit); #endif } @@ -587,7 +530,9 @@ void reader::impl::load_data() auto const stripe_start = stripe_chunk.start_idx; auto const stripe_end = stripe_chunk.start_idx + stripe_chunk.count; +#ifdef LOCAL_TEST printf("\n\nloading data from stripe %d -> %d\n", (int)stripe_start, (int)stripe_end); +#endif // Prepare the buffer to read raw data onto. // TODO: clear all old buffer. @@ -687,7 +632,7 @@ void reader::impl::load_data() info.length)); stream_compinfo_map[stream_id_info{ info.id.stripe_idx, info.id.level, info.id.orc_col_idx, info.id.kind}] = &compinfo.back(); -#ifdef PRINT_DEBUG +#ifdef LOCAL_TEST printf("collec stream [%d, %d, %d, %d]: dst = %lu, length = %lu\n", (int)info.id.stripe_idx, (int)info.id.level, @@ -716,7 +661,8 @@ void reader::impl::load_data() stream_compinfo->max_uncompressed_size}; stripe_decomp_sizes[stream_id.stripe_idx - stripe_chunk.start_idx].size_bytes += stream_compinfo->max_uncompressed_size; -#ifdef PRINT_DEBUG + +#ifdef LOCAL_TEST printf("cache info [%d, %d, %d, %d]: %lu | %lu | %lu\n", (int)stream_id.stripe_idx, (int)stream_id.level, @@ -733,8 +679,10 @@ void reader::impl::load_data() stream_compinfo_map.clear(); } else { +#ifdef LOCAL_TEST printf("no compression \n"); fflush(stdout); +#endif // Set decompression size equal to the input size. for (auto stream_idx = stream_begin; stream_idx < stream_end; ++stream_idx) { @@ -762,11 +710,15 @@ void reader::impl::load_data() if (_chunk_read_data.data_read_limit == 0 && // TODO: rows_to_read is changed every decode, should we change this? _file_itm_data.rows_to_read < static_cast(std::numeric_limits::max())) { +#ifdef LOCAL_TEST printf("0 limit: output decode stripe chunk unchanged\n"); +#endif + _chunk_read_data.decode_stripe_chunks = {stripe_chunk}; return; } +#ifdef LOCAL_TEST // TODO: remove if (_chunk_read_data.data_read_limit == 0) { printf("0 limit but size overflow\n"); } @@ -777,6 +729,7 @@ void reader::impl::load_data() if (count++ > 5) break; } } +#endif // Compute the prefix sum of stripe data sizes. stripe_decomp_sizes.host_to_device_async(_stream); @@ -788,6 +741,7 @@ void reader::impl::load_data() stripe_decomp_sizes.device_to_host_sync(_stream); +#ifdef LOCAL_TEST { int count{0}; for (auto& size : stripe_decomp_sizes) { @@ -796,6 +750,7 @@ void reader::impl::load_data() if (count++ > 5) break; } } +#endif auto const decode_limit = [&] { // In this case, we have no read limit but have to split due to having large input in which @@ -814,33 +769,23 @@ void reader::impl::load_data() chunk.start_idx += stripe_chunk.start_idx; } -#ifndef PRINT_DEBUG +#ifdef LOCAL_TEST auto& splits = _chunk_read_data.decode_stripe_chunks; printf("------------\nSplits decode_stripe_chunks (/%d): \n", (int)stripe_chunk.count); for (size_t idx = 0; idx < splits.size(); idx++) { printf("{%ld, %ld}\n", splits[idx].start_idx, splits[idx].count); } fflush(stdout); - - // std::cout << " total rows: " << _file_itm_data.rows_to_read << std::endl; - // print_cumulative_row_info(stripe_size_bytes, " ", _chunk_read_info.chunks); - - // We need to verify that: - // 1. All chunk must have count > 0 - // 2. Chunks are continuous. - // 3. sum(sizes of stripes in a chunk) < size_limit if chunk has more than 1 stripe - // 4. sum(number of stripes in all chunks) == total_num_stripes. - // TODO: enable only in debug. -// verify_splits(splits, stripe_decompression_sizes, stripe_chunk.count, -// _file_itm_data.data_read_limit); #endif // lvl_stripe_data.clear(); // _file_itm_data.compinfo_ready = true; +#ifdef LOCAL_TEST auto peak_mem = mem_stats_logger.peak_memory_usage(); std::cout << "load, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; +#endif } } // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 95dd2fc13a2..7f2e0b15b8a 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -240,10 +240,13 @@ struct chunk_read_data { // Only has more chunk to output if: bool has_next() const { +#ifdef LOCAL_TEST printf("compute has_next: %d, %d, %d\n", (int)more_stripe_to_load(), (int)more_stripe_to_decode(), (int)more_table_chunk_to_output()); +#endif + return more_stripe_to_load() || more_stripe_to_decode() || more_table_chunk_to_output(); } }; diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index cab80235ea3..255fe8c0b0c 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -116,15 +116,15 @@ rmm::device_buffer decompress_stripe_data( continue; } -#ifdef PRINT_DEBUG - printf("collec stream again [%d, %d, %d, %d]: dst = %lu, length = %lu\n", - (int)info.id.stripe_idx, - (int)info.id.level, - (int)info.id.orc_cold_idx, - (int)info.id.kind, - info.dst_pos, - info.length); - fflush(stdout); +#ifdef LOCAL_TEST +// printf("collec stream again [%d, %d, %d, %d]: dst = %lu, length = %lu\n", +// (int)info.id.stripe_idx, +// (int)info.id.level, +// (int)info.id.orc_col_idx, +// (int)info.id.kind, +// info.dst_pos, +// info.length); +// fflush(stdout); #endif compinfo.push_back(gpu::CompressedStreamInfo( @@ -485,7 +485,10 @@ void decode_stream_data(std::size_t num_dicts, { auto const num_stripes = chunks.size().first; auto const num_columns = chunks.size().second; + +#ifdef LOCAL_TEST printf("decode %d stripess \n", (int)num_stripes); +#endif thrust::counting_iterator col_idx_it(0); thrust::counting_iterator stripe_idx_it(0); @@ -507,7 +510,10 @@ void decode_stream_data(std::size_t num_dicts, chunks.base_device_ptr(), global_dict.data(), num_columns, num_stripes, skip_rows, stream); if (level > 0) { +#ifdef LOCAL_TEST printf("update_null_mask\n"); +#endif + // Update nullmasks for children if parent was a struct and had null mask update_null_mask(chunks, out_buffers, stream, mr); } @@ -643,7 +649,6 @@ void aggregate_child_meta(std::size_t stripe_start, int index = 0; // number of child column processed - printf("\n\n"); // For each parent column, update its child column meta for each stripe. std::for_each(nested_cols.begin(), nested_cols.end(), [&](auto const p_col) { // printf("p_col.id: %d\n", (int)p_col.id); @@ -749,7 +754,9 @@ std::vector find_table_splits(table_view const& input, std::size_t size_limit, rmm::cuda_stream_view stream) { +#ifdef LOCAL_TEST printf("find table split, seg length = %d, limit = %d \n", segment_length, (int)size_limit); +#endif // If segment_length is zero: we don't have any limit on granularity. // As such, set segment length to the number of rows. @@ -783,6 +790,7 @@ std::vector find_table_splits(table_view const& input, return cumulative_size{current_length, static_cast(size)}; }); +#ifdef LOCAL_TEST { int count{0}; // TODO: remove: @@ -794,6 +802,7 @@ std::vector find_table_splits(table_view const& input, ++count; } } +#endif // TODO: exec_policy_nosync thrust::inclusive_scan(rmm::exec_policy(stream), @@ -822,7 +831,9 @@ void reader::impl::decompress_and_decode() auto const load_stripe_start = _chunk_read_data.load_stripe_chunks[_chunk_read_data.curr_load_stripe_chunk - 1].start_idx; +#ifdef LOCAL_TEST printf("\ndecoding data from stripe %d -> %d\n", (int)stripe_start, (int)stripe_end); +#endif auto const rows_to_skip = _file_itm_data.rows_to_skip; // auto const rows_to_read = _file_itm_data.rows_to_read; @@ -853,7 +864,9 @@ void reader::impl::decompress_and_decode() _file_itm_data.rows_to_skip = 0; _file_itm_data.rows_to_read -= rows_to_read; +#ifdef LOCAL_TEST printf("decode, skip = %ld, read = %ld\n", rows_to_skip, rows_to_read); +#endif CUDF_EXPECTS(rows_to_read <= static_cast(std::numeric_limits::max()), "Number or rows to decode exceeds the column size limit.", @@ -956,6 +969,7 @@ void reader::impl::decompress_and_decode() auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_chunks; for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { +#ifdef LOCAL_TEST printf("processing level = %d\n", (int)level); { @@ -964,6 +978,7 @@ void reader::impl::decompress_and_decode() std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } +#endif auto const& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; auto const [stream_begin, stream_end] = get_range(stripe_stream_chunks, stripe_chunk); @@ -1006,12 +1021,14 @@ void reader::impl::decompress_and_decode() chunks = cudf::detail::hostdevice_2dvector(num_stripes, num_columns, _stream); memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); +#ifdef LOCAL_TEST { _stream.synchronize(); auto peak_mem = mem_stats_logger.peak_memory_usage(); std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } +#endif const bool use_index = _config.use_index && @@ -1025,7 +1042,9 @@ void reader::impl::decompress_and_decode() // TODO: Fix logic to handle unaligned rows (rows_to_skip == 0); +#ifdef LOCAL_TEST printf(" use_index: %d\n", (int)use_index); +#endif // Logically view streams as columns auto const& stream_info = _file_itm_data.lvl_stream_info[level]; @@ -1051,9 +1070,10 @@ void reader::impl::decompress_and_decode() std::size_t stream_idx = 0; for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { - // for (auto const& stripe : selected_stripes) { - +#ifdef LOCAL_TEST printf("processing stripe_idx = %d\n", (int)stripe_idx); +#endif + auto const& stripe = selected_stripes[stripe_idx]; auto const stripe_info = stripe.stripe_info; auto const stripe_footer = stripe.stripe_footer; @@ -1076,7 +1096,9 @@ void reader::impl::decompress_and_decode() &chunks); auto const is_stripe_data_empty = total_data_size == 0; +#ifdef LOCAL_TEST printf("is_stripe_data_empty: %d\n", (int)is_stripe_data_empty); +#endif CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, "Invalid index rowgroup stream data"); @@ -1089,7 +1111,9 @@ void reader::impl::decompress_and_decode() // fflush(stdout); auto const num_rows_per_stripe = static_cast(stripe_info->numberOfRows); +#ifdef LOCAL_TEST printf(" num_rows_per_stripe : %d\n", (int)num_rows_per_stripe); +#endif auto const rowgroup_id = num_rowgroups; auto stripe_num_rowgroups = 0; @@ -1115,10 +1139,11 @@ void reader::impl::decompress_and_decode() ? static_cast(stripe_info->numberOfRows) : col_meta .num_child_rows_per_stripe[(stripe_idx - stripe_start) * num_columns + col_idx]; - printf("col idx: %d, start_row: %d, num rows: %d\n", - (int)col_idx, - (int)chunk.start_row, - (int)chunk.num_rows); + + // printf("col idx: %d, start_row: %d, num rows: %d\n", + // (int)col_idx, + // (int)chunk.start_row, + // (int)chunk.num_rows); chunk.column_num_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[col_idx]; chunk.parent_validity_info = @@ -1131,7 +1156,7 @@ void reader::impl::decompress_and_decode() chunk.type_kind = _metadata.per_file_metadata[stripe.source_idx].ff.types[columns_level[col_idx].id].kind; - printf("type: %d\n", (int)chunk.type_kind); + // printf("type: %d\n", (int)chunk.type_kind); // num_child_rows for a struct column will be same, for other nested types it will be // calculated. @@ -1211,12 +1236,14 @@ void reader::impl::decompress_and_decode() // fflush(stdout); CUDF_EXPECTS(_chunk_read_data.curr_load_stripe_chunk > 0, "ERRRRR"); +#ifdef LOCAL_TEST { _stream.synchronize(); auto peak_mem = mem_stats_logger.peak_memory_usage(); std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } +#endif auto decomp_data = decompress_stripe_data( _chunk_read_data.load_stripe_chunks[_chunk_read_data.curr_load_stripe_chunk - 1], @@ -1240,12 +1267,14 @@ void reader::impl::decompress_and_decode() stripe_data[i + stripe_start - load_stripe_start] = {}; } +#ifdef LOCAL_TEST { _stream.synchronize(); auto peak_mem = mem_stats_logger.peak_memory_usage(); std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } +#endif // printf("line %d\n", __LINE__); // fflush(stdout); @@ -1273,29 +1302,35 @@ void reader::impl::decompress_and_decode() // printf("line %d\n", __LINE__); // fflush(stdout); +#ifdef LOCAL_TEST { _stream.synchronize(); auto peak_mem = mem_stats_logger.peak_memory_usage(); std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } +#endif // TODO: do not clear but reset each one. // and only reset if the new size/type are different. _out_buffers[level].clear(); +#ifdef LOCAL_TEST { _stream.synchronize(); auto peak_mem = mem_stats_logger.peak_memory_usage(); std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } +#endif for (std::size_t i = 0; i < column_types.size(); ++i) { bool is_nullable = false; for (std::size_t j = 0; j < num_stripes; ++j) { if (chunks[j][i].strm_len[gpu::CI_PRESENT] != 0) { +#ifdef LOCAL_TEST printf(" is nullable\n"); +#endif is_nullable = true; break; } @@ -1305,17 +1340,20 @@ void reader::impl::decompress_and_decode() // printf(" create col, num rows: %d\n", (int)n_rows); +#ifdef LOCAL_TEST { _stream.synchronize(); auto peak_mem = mem_stats_logger.peak_memory_usage(); std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } +#endif // For list column, offset column will be always size + 1 if (is_list_type) n_rows++; _out_buffers[level].emplace_back(column_types[i], n_rows, is_nullable, _stream, _mr); +#ifdef LOCAL_TEST { _stream.synchronize(); auto peak_mem = mem_stats_logger.peak_memory_usage(); @@ -1323,17 +1361,20 @@ void reader::impl::decompress_and_decode() << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } +#endif } // printf("line %d\n", __LINE__); // fflush(stdout); +#ifdef LOCAL_TEST { _stream.synchronize(); auto peak_mem = mem_stats_logger.peak_memory_usage(); std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } +#endif decode_stream_data(num_dict_entries, rows_to_skip, @@ -1346,18 +1387,22 @@ void reader::impl::decompress_and_decode() _stream, _mr); +#ifdef LOCAL_TEST { _stream.synchronize(); auto peak_mem = mem_stats_logger.peak_memory_usage(); std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } +#endif // printf("line %d\n", __LINE__); // fflush(stdout); if (nested_cols.size()) { +#ifdef LOCAL_TEST printf("have nested col\n"); +#endif // Extract information to process nested child columns scan_null_counts(chunks, null_count_prefix_sums[level], _stream); @@ -1389,12 +1434,14 @@ void reader::impl::decompress_and_decode() // fflush(stdout); } // end loop level +#ifdef LOCAL_TEST { _stream.synchronize(); auto peak_mem = mem_stats_logger.peak_memory_usage(); std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } +#endif std::vector> out_columns; _out_metadata = get_meta_with_user_data(); @@ -1427,12 +1474,14 @@ void reader::impl::decompress_and_decode() } } +#ifdef LOCAL_TEST { _stream.synchronize(); auto peak_mem = mem_stats_logger.peak_memory_usage(); std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } +#endif // printf("col: \n"); // cudf::test::print(_chunk_read_data.decoded_table->get_column(0).view()); @@ -1449,6 +1498,7 @@ void reader::impl::decompress_and_decode() _chunk_read_data.output_size_limit, _stream); +#ifdef LOCAL_TEST auto& splits = _chunk_read_data.output_table_chunks; printf("------------\nSplits decoded table (/total num rows = %d): \n", (int)_chunk_read_data.decoded_table->num_rows()); @@ -1463,6 +1513,7 @@ void reader::impl::decompress_and_decode() std::cout << "decomp and decode, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; } +#endif } } // namespace cudf::io::orc::detail diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index cded7a300de..862324e5aa8 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1036,8 +1036,6 @@ void input_limit_test_read(int test_location, for (size_t idx = 0; idx < test_files.size(); ++idx) { SCOPED_TRACE("Original line of failure: " + std::to_string(test_location) + ", file idx: " + std::to_string(idx)); - // TODO: remove - printf("file_idx %d\n", (int)idx); auto const [result, num_chunks] = chunked_read(test_files[idx], output_limit_bytes, input_limit_bytes); EXPECT_EQ(expected_chunk_counts[idx], num_chunks); @@ -1372,8 +1370,6 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) } } - printf("buffer size: %zu\n", data_buffer.size()); - // Verify metadata. auto const metadata = cudf::io::read_orc_metadata(cudf::io::source_info{data_buffer.data(), data_buffer.size()}); @@ -1420,7 +1416,6 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) CUDF_TEST_EXPECT_TABLES_EQUAL(expected, read_result->view()); } -// #define LOCAL_TEST #ifdef LOCAL_TEST // Read with only output limit -- there is no limit on the memory usage. // However, the reader should be able to detect and load only enough stripes each time From 971296f982585d015a501a3d9445f76a2d027f5f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 8 Mar 2024 13:18:36 -0800 Subject: [PATCH 194/321] Update benchmark Signed-off-by: Nghia Truong --- cpp/benchmarks/io/orc/orc_reader_input.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index 8254bf65fe2..3d83568e128 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -119,8 +119,9 @@ using d_type_list = nvbench:: using io_list = nvbench::enum_type_list; -using compression_list = - nvbench::enum_type_list; +using compression_list = nvbench::enum_type_list; // NVBENCH_BENCH_TYPES(BM_orc_read_data, // NVBENCH_TYPE_AXES(d_type_list, From 10b7ca7ad359772e2be457f58ef5a58b76e187fb Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 8 Mar 2024 13:44:39 -0800 Subject: [PATCH 195/321] Revert changes in `orc_read_input.cpp` Signed-off-by: Nghia Truong --- cpp/benchmarks/CMakeLists.txt | 3 +- cpp/benchmarks/io/orc/orc_reader_input.cpp | 46 +++++++++++++--------- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index caf3b35b629..ef25278877e 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -255,8 +255,7 @@ ConfigureNVBench( # ################################################################################################## # * orc reader benchmark -------------------------------------------------------------------------- -# TODO: add back the removed file, and add new file -ConfigureNVBench(ORC_READER_NVBENCH io/orc/orc_reader_input.cpp) +ConfigureNVBench(ORC_READER_NVBENCH io/orc/orc_reader_input.cpp io/orc/orc_reader_options.cpp) # ################################################################################################## # * csv reader benchmark -------------------------------------------------------------------------- diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index 3d83568e128..fdb7dbe59b8 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -88,6 +88,9 @@ void BM_orc_read_io_compression( nvbench::type_list, nvbench::enum_type>) { auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL_SIGNED), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), static_cast(data_type::STRING), static_cast(data_type::LIST), static_cast(data_type::STRUCT)}); @@ -113,24 +116,29 @@ void BM_orc_read_io_compression( orc_read_common(num_rows_written, source_sink, state); } -using d_type_list = nvbench:: - enum_type_list; - -using io_list = - nvbench::enum_type_list; - -using compression_list = nvbench::enum_type_list; - -// NVBENCH_BENCH_TYPES(BM_orc_read_data, -// NVBENCH_TYPE_AXES(d_type_list, -// nvbench::enum_type_list)) -// .set_name("orc_read_decode") -// .set_type_axes_names({"data_type", "io"}) -// .set_min_samples(4) -// .add_int64_axis("cardinality", {0, 1000}) -// .add_int64_axis("run_length", {1, 32}); +using d_type_list = nvbench::enum_type_list; + +using io_list = nvbench::enum_type_list; + +using compression_list = + nvbench::enum_type_list; + +NVBENCH_BENCH_TYPES(BM_orc_read_data, + NVBENCH_TYPE_AXES(d_type_list, + nvbench::enum_type_list)) + .set_name("orc_read_decode") + .set_type_axes_names({"data_type", "io"}) + .set_min_samples(4) + .add_int64_axis("cardinality", {0, 1000}) + .add_int64_axis("run_length", {1, 32}); NVBENCH_BENCH_TYPES(BM_orc_read_io_compression, NVBENCH_TYPE_AXES(io_list, compression_list)) .set_name("orc_read_io_compression") From 0b8a2b56610ce0df4bcb69a5c33bad49f0b4af85 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 8 Mar 2024 14:08:51 -0800 Subject: [PATCH 196/321] Revert changes in `parquet/reader_impl_helpers.cpp` Signed-off-by: Nghia Truong --- cpp/src/io/parquet/reader_impl_helpers.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/src/io/parquet/reader_impl_helpers.cpp b/cpp/src/io/parquet/reader_impl_helpers.cpp index 6c3cba8059c..3ecc5beb9d3 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.cpp +++ b/cpp/src/io/parquet/reader_impl_helpers.cpp @@ -618,9 +618,7 @@ aggregate_reader_metadata::select_row_groups( auto [rows_to_skip, rows_to_read] = [&]() { if (not row_group_indices.empty()) { return std::pair{}; } auto const from_opts = cudf::io::detail::skip_rows_num_rows_from_options( - skip_rows_opt, - num_rows_opt.has_value() ? std::optional{num_rows_opt.value()} : std::nullopt, - get_num_rows()); + skip_rows_opt, num_rows_opt, get_num_rows()); return std::pair{static_cast(from_opts.first), static_cast(from_opts.second)}; }(); From 589975120f2607b456dd2b2e8af38c7fe4a116df Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 8 Mar 2024 15:10:33 -0800 Subject: [PATCH 197/321] Implement chunked read benchmark Signed-off-by: Nghia Truong --- cpp/benchmarks/io/orc/orc_reader_input.cpp | 79 +++++++++++++++++----- 1 file changed, 63 insertions(+), 16 deletions(-) diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index fdb7dbe59b8..0503ede62ed 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,31 +24,70 @@ #include +namespace { + // Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to // run on most GPUs, but large enough to allow highest throughput constexpr int64_t data_size = 512 << 20; constexpr cudf::size_type num_cols = 64; +template +void read_once(cudf::io::orc_reader_options const& options, + cudf::size_type num_rows_to_read, + Timer& timer) +{ + timer.start(); + auto const result = cudf::io::read_orc(options); + timer.stop(); + + CUDF_EXPECTS(result.tbl->num_columns() == num_cols, "Unexpected number of columns"); + CUDF_EXPECTS(result.tbl->num_rows() == num_rows_to_read, "Unexpected number of rows"); +} + +template +void chunked_read(cudf::io::orc_reader_options const& options, + cudf::size_type num_rows_to_read, + cudf::size_type appox_num_chunks, + Timer& timer) +{ + // Create a chunked reader that has an internal memory limits to process around 10 chunks. + auto const output_limit = static_cast(data_size / appox_num_chunks); + auto const input_limit = output_limit * 10; + + auto reader = cudf::io::chunked_orc_reader(output_limit, input_limit, options); + cudf::size_type num_rows{0}; + + timer.start(); + do { + auto chunk = reader.read_chunk(); + num_rows += chunk.tbl->num_rows(); + } while (reader.has_next()); + timer.stop(); + + CUDF_EXPECTS(num_rows == num_rows_to_read, "Unexpected number of rows"); +} + +template void orc_read_common(cudf::size_type num_rows_to_read, cuio_source_sink_pair& source_sink, nvbench::state& state) { - cudf::io::orc_reader_options read_opts = - cudf::io::orc_reader_options::builder(source_sink.make_source_info()); + auto const read_opts = + cudf::io::orc_reader_options::builder(source_sink.make_source_info()).build(); + cudf::size_type constexpr approx_num_chunks = 10; auto mem_stats_logger = cudf::memory_stats_logger(); // init stats logger state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); - state.exec( - nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) { - try_drop_l3_cache(); + state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer, + [&](nvbench::launch&, auto& timer) { + try_drop_l3_cache(); - timer.start(); - auto const result = cudf::io::read_orc(read_opts); - timer.stop(); - - CUDF_EXPECTS(result.tbl->num_columns() == num_cols, "Unexpected number of columns"); - CUDF_EXPECTS(result.tbl->num_rows() == num_rows_to_read, "Unexpected number of rows"); - }); + if constexpr (!is_chunked_read) { + read_once(read_opts, num_rows_to_read, timer); + } else { + chunked_read(read_opts, num_rows_to_read, approx_num_chunks, timer); + } + }); auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); @@ -57,6 +96,8 @@ void orc_read_common(cudf::size_type num_rows_to_read, state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); } +} // namespace + template void BM_orc_read_data(nvbench::state& state, nvbench::type_list, nvbench::enum_type>) @@ -79,7 +120,7 @@ void BM_orc_read_data(nvbench::state& state, return view.num_rows(); }(); - orc_read_common(num_rows_written, source_sink, state); + orc_read_common(num_rows_written, source_sink, state); } template @@ -113,7 +154,12 @@ void BM_orc_read_io_compression( return view.num_rows(); }(); - orc_read_common(num_rows_written, source_sink, state); + auto const is_chunked_read = static_cast(state.get_int64("chunked_read")); + if (is_chunked_read) { + orc_read_common(num_rows_written, source_sink, state); + } else { + orc_read_common(num_rows_written, source_sink, state); + } } using d_type_list = nvbench::enum_type_list Date: Fri, 8 Mar 2024 16:54:47 -0800 Subject: [PATCH 198/321] Remove redundant parameters, and rewrite docs Signed-off-by: Nghia Truong --- cpp/include/cudf/io/detail/orc.hpp | 3 +- cpp/src/io/functions.cpp | 2 +- cpp/src/io/orc/reader.cu | 5 +- cpp/src/io/orc/reader_impl.cu | 21 +++----- cpp/src/io/orc/reader_impl.hpp | 72 ++++++++++---------------- cpp/src/io/orc/reader_impl_chunking.cu | 8 ++- 6 files changed, 40 insertions(+), 71 deletions(-) diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index 8cc41bd5057..c07dbef11d7 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -72,10 +72,9 @@ class reader { /** * @brief Reads the entire dataset. * - * @param options Settings for controlling reading behavior * @return The set of columns along with table metadata */ - table_with_metadata read(orc_reader_options const& options); + table_with_metadata read(); }; /** diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index e8dbf97abd6..6a08e41d161 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -419,7 +419,7 @@ table_with_metadata read_orc(orc_reader_options const& options, auto datasources = make_datasources(options.get_source()); auto reader = std::make_unique(std::move(datasources), options, stream, mr); - return reader->read(options); + return reader->read(); } /** diff --git a/cpp/src/io/orc/reader.cu b/cpp/src/io/orc/reader.cu index af6a3a79817..ea0b43c0f93 100644 --- a/cpp/src/io/orc/reader.cu +++ b/cpp/src/io/orc/reader.cu @@ -31,10 +31,7 @@ reader::reader(std::vector>&& sources, { } -table_with_metadata reader::read(orc_reader_options const& options) -{ - return _impl->read(options.get_skip_rows(), options.get_num_rows(), options.get_stripes()); -} +table_with_metadata reader::read() { return _impl->read(); } chunked_reader::chunked_reader(std::size_t output_size_limit, std::size_t data_read_limit, diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 1061a7ec64f..1ed3e1347c0 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -56,14 +56,11 @@ namespace cudf::io::orc::detail { -void reader::impl::prepare_data(int64_t skip_rows, - std::optional const& num_rows_opt, - std::vector> const& stripes, - read_mode mode) +void reader::impl::prepare_data(read_mode mode) { // Selected columns at different levels of nesting are stored in different elements // of `selected_columns`; thus, size == 1 means no nested columns - CUDF_EXPECTS(skip_rows == 0 or _selected_columns.num_levels() == 1, + CUDF_EXPECTS(_config.skip_rows == 0 or _selected_columns.num_levels() == 1, "skip_rows is not supported by nested columns"); // There are no columns in the table. @@ -73,7 +70,7 @@ void reader::impl::prepare_data(int64_t skip_rows, std::cout << "call global, skip = " << skip_rows << std::endl; #endif - global_preprocess(skip_rows, num_rows_opt, stripes, mode); + global_preprocess(mode); if (!_chunk_read_data.more_table_chunk_to_output()) { if (!_chunk_read_data.more_stripe_to_decode() && _chunk_read_data.more_stripe_to_load()) { @@ -281,11 +278,9 @@ reader::impl::impl(std::size_t output_size_limit, { } -table_with_metadata reader::impl::read(int64_t skip_rows, - std::optional const& num_rows_opt, - std::vector> const& stripes) +table_with_metadata reader::impl::read() { - prepare_data(skip_rows, num_rows_opt, stripes, read_mode::READ_ALL); + prepare_data(read_mode::READ_ALL); return make_output_chunk(); } @@ -295,8 +290,7 @@ bool reader::impl::has_next() printf("==================query has next \n"); #endif - prepare_data( - _config.skip_rows, _config.num_read_rows, _config.selected_stripes, read_mode::CHUNKED_READ); + prepare_data(read_mode::CHUNKED_READ); #ifdef LOCAL_TEST printf("has next: %d\n", (int)_chunk_read_data.has_next()); @@ -317,8 +311,7 @@ table_with_metadata reader::impl::read_chunk() } #endif - prepare_data( - _config.skip_rows, _config.num_read_rows, _config.selected_stripes, read_mode::CHUNKED_READ); + prepare_data(read_mode::CHUNKED_READ); #ifdef LOCAL_TEST { diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 84033ca0778..b609c04affd 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -102,16 +102,9 @@ class reader::impl { rmm::mr::device_memory_resource* mr); /** - * @brief Read an entire set or a subset of data and returns a set of columns - * - * @param skip_rows Number of rows to skip from the start - * @param num_rows_opt Optional number of rows to read, or `std::nullopt` to read all rows - * @param stripes Indices of individual stripes to load if non-empty - * @return The set of columns along with metadata + * @copydoc cudf::io::orc::detail::reader::read */ - table_with_metadata read(int64_t skip_rows, - std::optional const& num_rows_opt, - std::vector> const& stripes); + table_with_metadata read(); /** * @copydoc cudf::io::chunked_orc_reader::has_next @@ -124,70 +117,59 @@ class reader::impl { table_with_metadata read_chunk(); private: - // TODO + /** + * @brief The enum indicating whether the data sources are read all at once or chunk by chunk. + */ enum class read_mode { READ_ALL, CHUNKED_READ }; /** * @brief Perform all the necessary data preprocessing before creating an output table. * * This is the proxy to call all other data preprocessing functions, which are prerequisite - * for generating an output table. + * for generating the output. * - * @param skip_rows Number of rows to skip from the start - * @param num_rows_opt Optional number of rows to read, or `std::nullopt` to read all rows - * @param stripes Indices of individual stripes to load if non-empty + * @param mode Value indicating if the data sources are read all at once or chunk by chunk */ - void prepare_data(int64_t skip_rows, - std::optional const& num_rows_opt, - std::vector> const& stripes, - read_mode mode); + void prepare_data(read_mode mode); /** * @brief Perform a global preprocessing step that executes exactly once for the entire duration * of the reader. * - * TODO: rewrite, not use "ensure". + * In this step, the metadata of all stripes in the data sources is parsed, and information about + * data streams of the selected columns in all stripes are generated. If the reader has a data + * read limit, sizes of these streams are used to split the list of all stripes into multiple + * subsets, each of which will be read into memory in the `load_data()` step. These subsets are + * computed such that memory usage will be capped around a fixed size limit. * - * In this step, the metadata of all stripes in the data source is parsed, and information about - * data streams for all selected columns in alls tripes are generated. If the reader has a data - * read limit, data size of all stripes are used to determine the chunks of consecutive - * stripes for reading each time using the `load_data()` step. This is to ensure that loading - * these stripes will not exceed a fixed portion the data read limit. + * @param mode Value indicating if the data sources are read all at once or chunk by chunk */ - void global_preprocess(int64_t skip_rows, - std::optional const& num_rows_opt, - std::vector> const& stripes, - read_mode mode); + void global_preprocess(read_mode mode); /** - * @brief Load stripes from the input source and store the data in the internal buffers. + * @brief Load stripes from the input data sources into memory. * - * If there is a data read limit, only a chunk of stripes are read at a time such that - * their total data size does not exceed a fixed portion of the limit. Then, the data is - * probed to determine the uncompressed sizes for these loaded stripes, which are in turn - * used to determine a subset of stripes to decompress and decode in the next step - * `decompress_and_decode()`. - * This is to ensure that loading data together with decompression and decoding will not exceed - * the data read limit. + * If there is a data read limit, only a subset of stripes are read at a time such that + * their total data size does not exceed a fixed size limit. Then, the data is probed to + * estimate its uncompressed sizes, which are in turn used to split that stripe subset into + * smaller subsets, each of which to be decompressed and decoded in the next step + * `decompress_and_decode()`. This is to ensure that loading data from data sources together with + * decompression and decoding will be capped around the given data read limit. */ void load_data(); /** - * @brief Decompress and decode the data in the internal buffers, and store the result into - * an internal table. + * @brief Decompress and decode stripe data in the internal buffers, and store the result into + * an intermediate table. * - * If there is a data read limit, only a chunk of stripes are decompressed and decoded at a time. - * Then, the result is stored in an internal table, and sizes of its rows are computed - * to determine slices of rows to return as the output table in the final step - * `make_output_chunk`. + * This function expects that the other preprocessing steps (`global preprocess()` and + * `load_data()`) have already been done. */ void decompress_and_decode(); /** * @brief Create the output table from the intermediate table and return it along with metadata. * - * This function is called internally and expects all preprocessing steps have already been done. - * * @return The output table along with columns' metadata */ table_with_metadata make_output_chunk(); @@ -204,7 +186,7 @@ class reader::impl { memory_stats_logger mem_stats_logger; - // Reader configs + // Reader configs. struct { data_type timestamp_type; // override output timestamp resolution bool use_index; // enable or disable attempt to use row index for parsing diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index c4e094f47dd..09adefad6d9 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -298,10 +298,7 @@ std::pair get_range(std::vector const& input_chunks, return {begin, end}; } -void reader::impl::global_preprocess(int64_t skip_rows, - std::optional const& num_rows_opt, - std::vector> const& stripes, - read_mode mode) +void reader::impl::global_preprocess(read_mode mode) { if (_file_itm_data.global_preprocessed) { return; } _file_itm_data.global_preprocessed = true; @@ -309,7 +306,8 @@ void reader::impl::global_preprocess(int64_t skip_rows, // Load stripes's metadata. std::tie( _file_itm_data.rows_to_skip, _file_itm_data.rows_to_read, _file_itm_data.selected_stripes) = - _metadata.select_stripes(stripes, skip_rows, num_rows_opt, _stream); + _metadata.select_stripes( + _config.selected_stripes, _config.skip_rows, _config.num_read_rows, _stream); if (_file_itm_data.has_no_data()) { return; } CUDF_EXPECTS( From dff02358b9a91b22e9138fedb73535b3b35d9c78 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 8 Mar 2024 17:57:33 -0800 Subject: [PATCH 199/321] Cleanup Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 40 ++++---------------------- cpp/src/io/orc/reader_impl.hpp | 4 ++- cpp/src/io/orc/reader_impl_chunking.cu | 12 +++++--- 3 files changed, 16 insertions(+), 40 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 1ed3e1347c0..51bbd47d690 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -58,16 +58,11 @@ namespace cudf::io::orc::detail { void reader::impl::prepare_data(read_mode mode) { - // Selected columns at different levels of nesting are stored in different elements - // of `selected_columns`; thus, size == 1 means no nested columns - CUDF_EXPECTS(_config.skip_rows == 0 or _selected_columns.num_levels() == 1, - "skip_rows is not supported by nested columns"); - // There are no columns in the table. if (_selected_columns.num_levels() == 0) { return; } #ifdef LOCAL_TEST - std::cout << "call global, skip = " << skip_rows << std::endl; + std::cout << "call global, skip = " << _config.skip_rows << std::endl; #endif global_preprocess(mode); @@ -276,6 +271,10 @@ reader::impl::impl(std::size_t output_size_limit, data_read_limit, output_row_granularity > 0 ? output_row_granularity : DEFAULT_OUTPUT_ROW_GRANULARITY} { + // Selected columns at different levels of nesting are stored in different elements + // of `selected_columns`; thus, size == 1 means no nested columns. + CUDF_EXPECTS(_config.skip_rows == 0 or _selected_columns.num_levels() == 1, + "skip_rows is not supported if having nested columns"); } table_with_metadata reader::impl::read() @@ -286,42 +285,13 @@ table_with_metadata reader::impl::read() bool reader::impl::has_next() { -#ifdef LOCAL_TEST - printf("==================query has next \n"); -#endif - prepare_data(read_mode::CHUNKED_READ); - -#ifdef LOCAL_TEST - printf("has next: %d\n", (int)_chunk_read_data.has_next()); -#endif - return _chunk_read_data.has_next(); } table_with_metadata reader::impl::read_chunk() { -#ifdef LOCAL_TEST - printf("==================call read chunk\n"); - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << "\n\n\n------------start read chunk, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } -#endif - prepare_data(read_mode::CHUNKED_READ); - -#ifdef LOCAL_TEST - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << "done prepare data, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } -#endif - return make_output_chunk(); } diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index b609c04affd..ae518bc2a5f 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -199,13 +199,15 @@ class reader::impl { std::vector> const selected_stripes; } const _config; - // Intermediate data for internal processing. + // Intermediate data for reading. std::unique_ptr const _col_meta; // Track of orc mapping and child details std::vector> const _sources; // Unused but owns data for `_metadata` aggregate_orc_metadata _metadata; column_hierarchy const _selected_columns; // Construct from `_metadata` thus declare after it file_intermediate_data _file_itm_data; chunk_read_data _chunk_read_data; + + // Intermediate data for output. std::unique_ptr _meta_with_user_data; table_metadata _out_metadata; std::vector> _out_buffers; diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 09adefad6d9..a719ec73c91 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -317,10 +317,14 @@ void reader::impl::global_preprocess(read_mode mode) std::overflow_error); #ifdef LOCAL_TEST - printf("input skip rows: %ld, num rows: %ld\n", skip_rows, num_rows_opt.value_or(-1l)); - printf("actual skip rows: %ld, num rows: %ld\n", - _file_itm_data.rows_to_skip, - _file_itm_data.rows_to_read); + { + auto const skip_rows = _config.skip_rows; + auto const num_rows_opt = _config.num_read_rows; + printf("input skip rows: %ld, num rows: %ld\n", skip_rows, num_rows_opt.value_or(-1l)); + printf("actual skip rows: %ld, num rows: %ld\n", + _file_itm_data.rows_to_skip, + _file_itm_data.rows_to_read); + } #endif // auto const rows_to_skip = _file_itm_data.rows_to_skip; From 28e631f56284bedb5e978846e2260934314629a5 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 8 Mar 2024 18:22:02 -0800 Subject: [PATCH 200/321] Rename variables Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 21 ++++++++++----------- cpp/src/io/orc/reader_impl_chunking.hpp | 4 ++-- cpp/src/io/orc/reader_impl_decode.cu | 5 ++--- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index a719ec73c91..c00e7085bc3 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -68,7 +68,7 @@ namespace cudf::io::orc::detail { std::size_t gather_stream_info_and_column_desc( - int64_t stripe_index, + int64_t stripe_processing_order, std::size_t level, orc::StripeInformation const* stripeinfo, orc::StripeFooter const* stripefooter, @@ -77,7 +77,7 @@ std::size_t gather_stream_info_and_column_desc( bool use_index, bool apply_struct_map, int64_t* num_dictionary_entries, - std::size_t* stream_idx, + std::size_t* stream_processing_order, std::optional*> const& stream_info, std::optional*> const& chunks) { @@ -104,8 +104,7 @@ std::size_t gather_stream_info_and_column_desc( for (auto const& stream : stripefooter->streams) { if (!stream.column_id || *stream.column_id >= orc2gdf.size()) { // Ignore reading this stream from source. - // cudf::logger().warn("Unexpected stream in the input ORC source. The stream will be - // ignored."); + CUDF_LOG_WARN("Unexpected stream in the input ORC source. The stream will be ignored."); #ifdef LOCAL_TEST printf("Unexpected stream in the input ORC source. The stream will be ignored\n"); @@ -126,14 +125,13 @@ std::size_t gather_stream_info_and_column_desc( auto const schema_type = types[column_id]; if (!schema_type.subtypes.empty() && schema_type.kind == orc::STRUCT && stream.kind == orc::PRESENT) { - // printf("present stream\n"); for (auto const& idx : schema_type.subtypes) { auto const child_idx = (idx < orc2gdf.size()) ? orc2gdf[idx] : -1; if (child_idx >= 0) { col = child_idx; if (chunks.has_value()) { - auto& chunk = (*chunks.value())[stripe_index][col]; - chunk.strm_id[gpu::CI_PRESENT] = *stream_idx; + auto& chunk = (*chunks.value())[stripe_processing_order][col]; + chunk.strm_id[gpu::CI_PRESENT] = *stream_processing_order; chunk.strm_len[gpu::CI_PRESENT] = stream.length; } } @@ -144,7 +142,7 @@ std::size_t gather_stream_info_and_column_desc( if (src_offset >= stripeinfo->indexLength || use_index) { auto const index_type = get_stream_index_type(stream.kind); if (index_type < gpu::CI_NUM_STREAMS) { - auto& chunk = (*chunks.value())[stripe_index][col]; + auto& chunk = (*chunks.value())[stripe_processing_order][col]; // printf("use stream id: %d, stripe: %d, level: %d, col idx: %d, kind: %d\n", // (int)(*stream_idx), // (int)stripe_index, @@ -152,7 +150,7 @@ std::size_t gather_stream_info_and_column_desc( // (int)column_id, // (int)stream.kind); - chunk.strm_id[index_type] = *stream_idx; + chunk.strm_id[index_type] = *stream_processing_order; chunk.strm_len[index_type] = stream.length; // NOTE: skip_count field is temporarily used to track the presence of index streams chunk.skip_count |= 1 << index_type; @@ -165,7 +163,7 @@ std::size_t gather_stream_info_and_column_desc( } } - (*stream_idx)++; + (*stream_processing_order)++; } else { // not chunks.has_value() // printf("collect stream id: stripe: %d, level: %d, col idx: %d, kind: %d\n", // (int)stripe_index, @@ -177,7 +175,8 @@ std::size_t gather_stream_info_and_column_desc( stripeinfo->offset + src_offset, dst_offset, stream.length, - stream_id_info{static_cast(stripe_index), level, column_id, stream.kind}); + stream_id_info{ + static_cast(stripe_processing_order), level, column_id, stream.kind}); } dst_offset += stream.length; diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 7f2e0b15b8a..8a09090cc78 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -298,7 +298,7 @@ std::pair get_range(std::vector const& input_chunks, * data, but not both. */ std::size_t gather_stream_info_and_column_desc( - int64_t stripe_index, + int64_t stripe_processing_order, std::size_t level, orc::StripeInformation const* stripeinfo, orc::StripeFooter const* stripefooter, @@ -307,7 +307,7 @@ std::size_t gather_stream_info_and_column_desc( bool use_index, bool apply_struct_map, int64_t* num_dictionary_entries, - std::size_t* stream_idx, + std::size_t* stream_processing_order, std::optional*> const& stream_info, std::optional*> const& chunks); diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 255fe8c0b0c..e7aa9709608 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -1066,8 +1066,7 @@ void reader::impl::decompress_and_decode() int64_t num_rowgroups = 0; // TODO: Stripe and stream idx must be by chunk. - // std::size_t stripe_idx = 0; - std::size_t stream_idx = 0; + std::size_t stream_processing_order = 0; for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { #ifdef LOCAL_TEST @@ -1091,7 +1090,7 @@ void reader::impl::decompress_and_decode() use_index, level == 0, &num_dict_entries, - &stream_idx, + &stream_processing_order, std::nullopt, // stream_info &chunks); From d8163db9acbc864bc059cb81022a4ca5939d6d47 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 8 Mar 2024 20:42:36 -0800 Subject: [PATCH 201/321] Rename variable Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 20 ++++++++------- cpp/src/io/orc/reader_impl_chunking.hpp | 16 ++++++------ cpp/src/io/orc/reader_impl_decode.cu | 34 ++++++++++++------------- 3 files changed, 36 insertions(+), 34 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index c00e7085bc3..2a6af96983a 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -175,7 +175,7 @@ std::size_t gather_stream_info_and_column_desc( stripeinfo->offset + src_offset, dst_offset, stream.length, - stream_id_info{ + stream_source_info{ static_cast(stripe_processing_order), level, column_id, stream.kind}); } @@ -628,17 +628,18 @@ void reader::impl::load_data() for (auto stream_idx = stream_begin; stream_idx < stream_end; ++stream_idx) { auto const& info = stream_info[stream_idx]; compinfo.push_back(gpu::CompressedStreamInfo( - static_cast(stripe_data[info.id.stripe_idx - stripe_start].data()) + + static_cast(stripe_data[info.source.stripe_idx - stripe_start].data()) + info.dst_pos, info.length)); - stream_compinfo_map[stream_id_info{ - info.id.stripe_idx, info.id.level, info.id.orc_col_idx, info.id.kind}] = &compinfo.back(); + stream_compinfo_map[stream_source_info{ + info.source.stripe_idx, info.source.level, info.source.orc_col_idx, info.source.kind}] = + &compinfo.back(); #ifdef LOCAL_TEST printf("collec stream [%d, %d, %d, %d]: dst = %lu, length = %lu\n", - (int)info.id.stripe_idx, - (int)info.id.level, - (int)info.id.orc_col_idx, - (int)info.id.kind, + (int)info.source.stripe_idx, + (int)info.source.level, + (int)info.source.orc_col_idx, + (int)info.source.kind, info.dst_pos, info.length); fflush(stdout); @@ -688,7 +689,8 @@ void reader::impl::load_data() // Set decompression size equal to the input size. for (auto stream_idx = stream_begin; stream_idx < stream_end; ++stream_idx) { auto const& info = stream_info[stream_idx]; - stripe_decomp_sizes[info.id.stripe_idx - stripe_chunk.start_idx].size_bytes += info.length; + stripe_decomp_sizes[info.source.stripe_idx - stripe_chunk.start_idx].size_bytes += + info.length; } } diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 8a09090cc78..2543929bd72 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -32,15 +32,15 @@ namespace cudf::io::orc::detail { /** * @brief Struct that store identification of an ORC streams */ -struct stream_id_info { - uint32_t stripe_idx; // global stripe id throughout the data source +struct stream_source_info { + uint32_t stripe_idx; // global stripe id throughout all data sources // TODO: change type below std::size_t level; // level of the nested column uint32_t orc_col_idx; // orc column id StreamKind kind; // stream kind struct hash { - std::size_t operator()(stream_id_info const& id) const + std::size_t operator()(stream_source_info const& id) const { auto const hasher = std::hash{}; return hasher(id.stripe_idx) ^ hasher(id.level) ^ @@ -49,7 +49,7 @@ struct stream_id_info { } }; struct equal_to { - bool operator()(stream_id_info const& lhs, stream_id_info const& rhs) const + bool operator()(stream_source_info const& lhs, stream_source_info const& rhs) const { return lhs.stripe_idx == rhs.stripe_idx && lhs.level == rhs.level && lhs.orc_col_idx == rhs.orc_col_idx && lhs.kind == rhs.kind; @@ -62,7 +62,7 @@ struct stream_id_info { */ template using stream_id_map = - std::unordered_map; + std::unordered_map; /** * @brief Struct that store identification of an ORC stream. @@ -72,8 +72,8 @@ struct orc_stream_info { explicit orc_stream_info(uint64_t offset_, std::size_t dst_pos_, uint32_t length_, - stream_id_info const& id_) - : offset(offset_), dst_pos(dst_pos_), length(length_), id(id_) + stream_source_info const& source_) + : offset(offset_), dst_pos(dst_pos_), length(length_), source(source_) { #ifdef PRINT_DEBUG printf(" construct stripe id [%d, %d, %d, %d]\n", @@ -89,7 +89,7 @@ struct orc_stream_info { std::size_t length; // stream length to read // Store location of the stream in the stripe, so we can look up where this stream comes from. - stream_id_info id; + stream_source_info source; }; /** diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index e7aa9709608..13b928c23c4 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -101,8 +101,8 @@ rmm::device_buffer decompress_stripe_data( // TODO: use lvl_stripe_stream_chunks std::size_t count{0}; for (auto const& info : stream_info) { - if (info.id.stripe_idx < stripe_chunk.start_idx || - info.id.stripe_idx >= stripe_chunk.start_idx + stripe_chunk.count) { + if (info.source.stripe_idx < stripe_chunk.start_idx || + info.source.stripe_idx >= stripe_chunk.start_idx + stripe_chunk.count) { continue; } count++; @@ -111,17 +111,17 @@ rmm::device_buffer decompress_stripe_data( cudf::detail::hostdevice_vector compinfo(0, count, stream); for (auto const& info : stream_info) { - if (info.id.stripe_idx < stripe_chunk.start_idx || - info.id.stripe_idx >= stripe_chunk.start_idx + stripe_chunk.count) { + if (info.source.stripe_idx < stripe_chunk.start_idx || + info.source.stripe_idx >= stripe_chunk.start_idx + stripe_chunk.count) { continue; } #ifdef LOCAL_TEST // printf("collec stream again [%d, %d, %d, %d]: dst = %lu, length = %lu\n", -// (int)info.id.stripe_idx, -// (int)info.id.level, -// (int)info.id.orc_col_idx, -// (int)info.id.kind, +// (int)info.source.stripe_idx, +// (int)info.source.level, +// (int)info.source.orc_col_idx, +// (int)info.source.kind, // info.dst_pos, // info.length); // fflush(stdout); @@ -129,19 +129,19 @@ rmm::device_buffer decompress_stripe_data( compinfo.push_back(gpu::CompressedStreamInfo( static_cast( - stripe_data[info.id.stripe_idx - load_stripe_chunk.start_idx].data()) + + stripe_data[info.source.stripe_idx - load_stripe_chunk.start_idx].data()) + info.dst_pos, info.length)); // printf("line %d\n", __LINE__); // fflush(stdout); - auto const& cached_comp_info = compinfo_map.at( - stream_id_info{info.id.stripe_idx, info.id.level, info.id.orc_col_idx, info.id.kind}); + auto const& cached_comp_info = compinfo_map.at(stream_source_info{ + info.source.stripe_idx, info.source.level, info.source.orc_col_idx, info.source.kind}); // printf("line %d\n", __LINE__); // fflush(stdout); // auto const& cached_comp_info = - // compinfo_map[stream_id_info{info.id.stripe_idx, info.id.level, info.id.orc_cold_idx, - // info.id.kind}]; + // compinfo_map[stream_id_info{info.source.stripe_idx, info.source.level, + // info.source.orc_cold_idx, info.source.kind}]; auto& stream_comp_info = compinfo.back(); stream_comp_info.num_compressed_blocks = cached_comp_info.num_compressed_blocks; stream_comp_info.num_uncompressed_blocks = cached_comp_info.num_uncompressed_blocks; @@ -171,10 +171,10 @@ rmm::device_buffer decompress_stripe_data( auto const& info = stream_info[i]; printf("compute info [%d, %d, %d, %d]: %lu | %lu | %lu\n", - (int)info.id.stripe_idx, - (int)info.id.level, - (int)info.id.orc_cold_idx, - (int)info.id.kind, + (int)info.source.stripe_idx, + (int)info.source.level, + (int)info.source.orc_cold_idx, + (int)info.source.kind, (size_t)compinfo[i].num_compressed_blocks, (size_t)compinfo[i].num_uncompressed_blocks, compinfo[i].max_uncompressed_size); From 3f57b5ff70cd6af2df27f3d2234ebd9871e0cb28 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 8 Mar 2024 20:45:47 -0800 Subject: [PATCH 202/321] Change variable name Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 2 +- cpp/src/io/orc/reader_impl_chunking.hpp | 12 ++---------- cpp/src/io/orc/reader_impl_decode.cu | 2 +- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 2a6af96983a..2bacbf6e72e 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -587,7 +587,7 @@ void reader::impl::load_data() // TODO: This is subpass // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. - stream_id_map stream_compinfo_map; + stream_source_map stream_compinfo_map; cudf::detail::hostdevice_vector stripe_decomp_sizes(stripe_chunk.count, _stream); diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 2543929bd72..371509c5eb4 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -61,27 +61,19 @@ struct stream_source_info { * @brief Map to lookup a value from stream id. */ template -using stream_id_map = +using stream_source_map = std::unordered_map; /** * @brief Struct that store identification of an ORC stream. */ struct orc_stream_info { - // TODO: remove constructor explicit orc_stream_info(uint64_t offset_, std::size_t dst_pos_, uint32_t length_, stream_source_info const& source_) : offset(offset_), dst_pos(dst_pos_), length(length_), source(source_) { -#ifdef PRINT_DEBUG - printf(" construct stripe id [%d, %d, %d, %d]\n", - (int)stripe_idx, - (int)level, - (int)orc_col_idx, - (int)kind); -#endif } // Data info: uint64_t offset; // offset in data source @@ -133,7 +125,7 @@ struct file_intermediate_data { std::size_t num_stripes() const { return selected_stripes.size(); } // Store the compression information for each data stream. - stream_id_map compinfo_map; + stream_source_map compinfo_map; // The buffers to store raw data read from disk, initialized for each reading stripe chunks. // After decoding, such buffers can be released. diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 13b928c23c4..f96efaa0174 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -80,7 +80,7 @@ namespace { rmm::device_buffer decompress_stripe_data( chunk const& load_stripe_chunk, chunk const& stripe_chunk, - stream_id_map const& compinfo_map, + stream_source_map const& compinfo_map, OrcDecompressor const& decompressor, host_span stripe_data, host_span stream_info, From 7a04022a3ac3b2e9830e3fc2e99092bf84cef20d Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 8 Mar 2024 21:31:19 -0800 Subject: [PATCH 203/321] Change data type Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 36 ++++++++++++------------- cpp/src/io/orc/reader_impl_chunking.hpp | 23 ++++++++-------- cpp/src/io/orc/reader_impl_decode.cu | 10 ++++--- 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 2bacbf6e72e..ab0c5171f08 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -68,7 +68,7 @@ namespace cudf::io::orc::detail { std::size_t gather_stream_info_and_column_desc( - int64_t stripe_processing_order, + std::size_t stripe_processing_order, std::size_t level, orc::StripeInformation const* stripeinfo, orc::StripeFooter const* stripefooter, @@ -175,8 +175,7 @@ std::size_t gather_stream_info_and_column_desc( stripeinfo->offset + src_offset, dst_offset, stream.length, - stream_source_info{ - static_cast(stripe_processing_order), level, column_id, stream.kind}); + stream_source_info{stripe_processing_order, level, column_id, stream.kind}); } dst_offset += stream.length; @@ -193,7 +192,9 @@ std::size_t gather_stream_info_and_column_desc( * given `size_limit`. */ template -std::vector find_splits(host_span sizes, int64_t total_count, size_t size_limit) +std::vector find_splits(host_span sizes, + std::size_t total_count, + std::size_t size_limit) { // if (size_limit == 0) { // printf("0 limit: output chunk = 0, %d\n", (int)total_count); @@ -202,7 +203,7 @@ std::vector find_splits(host_span sizes, int64_t total_count, si CUDF_EXPECTS(size_limit > 0, "Invalid size limit"); std::vector splits; - int64_t cur_count{0}; + std::size_t cur_count{0}; int64_t cur_pos{0}; size_t cur_cumulative_size{0}; @@ -242,7 +243,7 @@ std::vector find_splits(host_span sizes, int64_t total_count, si auto const start_idx = cur_count; cur_count = sizes[split_pos].count; - splits.emplace_back(chunk{start_idx, static_cast(cur_count - start_idx)}); + splits.emplace_back(chunk{start_idx, cur_count - start_idx}); cur_pos = split_pos; cur_cumulative_size = sizes[split_pos].size_bytes; @@ -266,10 +267,10 @@ std::vector find_splits(host_span sizes, int64_t total_count, si } template std::vector find_splits(host_span sizes, - int64_t total_count, - size_t size_limit); + std::size_t total_count, + std::size_t size_limit); template std::vector find_splits( - host_span sizes, int64_t total_count, size_t size_limit); + host_span sizes, std::size_t total_count, std::size_t size_limit); #endif /** @@ -400,7 +401,7 @@ void reader::impl::global_preprocess(read_mode mode) auto const stripe_footer = stripe.stripe_footer; std::size_t total_stripe_size{0}; - auto const last_read_size = static_cast(read_info.size()); + auto const last_read_size = read_info.size(); for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto& stream_info = _file_itm_data.lvl_stream_info[level]; auto& stripe_sizes = lvl_stripe_sizes[level]; @@ -428,10 +429,8 @@ void reader::impl::global_preprocess(read_mode mode) stripe_sizes[stripe_idx] = stripe_size; total_stripe_size += stripe_size; - auto& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; - stripe_stream_chunks[stripe_idx] = - chunk{static_cast(stream_count), - static_cast(stream_info.size() - stream_count)}; + auto& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; + stripe_stream_chunks[stripe_idx] = chunk{stream_count, stream_info.size() - stream_count}; // Coalesce consecutive streams into one read while (not is_stripe_data_empty and stream_count < stream_info.size()) { @@ -448,9 +447,8 @@ void reader::impl::global_preprocess(read_mode mode) read_info.emplace_back(offset, len, d_dst, stripe.source_idx, stripe_idx, level); } } - total_stripe_sizes[stripe_idx] = {1, total_stripe_size}; - stripe_data_read_chunks[stripe_idx] = - chunk{last_read_size, static_cast(read_info.size() - last_read_size)}; + total_stripe_sizes[stripe_idx] = {1, total_stripe_size}; + stripe_data_read_chunks[stripe_idx] = chunk{last_read_size, read_info.size() - last_read_size}; } _chunk_read_data.curr_load_stripe_chunk = 0; @@ -461,7 +459,7 @@ void reader::impl::global_preprocess(read_mode mode) printf("0 limit: output load stripe chunk = 0, %d\n", (int)num_stripes); #endif - _chunk_read_data.load_stripe_chunks = {chunk{0, static_cast(num_stripes)}}; + _chunk_read_data.load_stripe_chunks = {chunk{0ul, num_stripes}}; return; } @@ -591,7 +589,7 @@ void reader::impl::load_data() cudf::detail::hostdevice_vector stripe_decomp_sizes(stripe_chunk.count, _stream); - for (int64_t stripe_idx = 0; stripe_idx < stripe_chunk.count; ++stripe_idx) { + for (std::size_t stripe_idx = 0; stripe_idx < stripe_chunk.count; ++stripe_idx) { auto const& stripe = selected_stripes[stripe_idx]; auto const stripe_info = stripe.stripe_info; diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 371509c5eb4..9e70ec246a1 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -33,11 +33,10 @@ namespace cudf::io::orc::detail { * @brief Struct that store identification of an ORC streams */ struct stream_source_info { - uint32_t stripe_idx; // global stripe id throughout all data sources - // TODO: change type below - std::size_t level; // level of the nested column - uint32_t orc_col_idx; // orc column id - StreamKind kind; // stream kind + std::size_t stripe_idx; // global stripe id throughout all data sources + std::size_t level; // level of the nested column + uint32_t orc_col_idx; // orc column id + StreamKind kind; // stream kind struct hash { std::size_t operator()(stream_source_info const& id) const @@ -98,8 +97,8 @@ struct stripe_level_comp_info { * @brief Struct that store information about a chunk of data. */ struct chunk { - int64_t start_idx; - int64_t count; + std::size_t start_idx; + std::size_t count; }; /** @@ -247,13 +246,13 @@ struct chunk_read_data { * @brief Struct to accumulate sizes of chunks of some data such as stripe or rows. */ struct cumulative_size { - int64_t count{0}; + std::size_t count{0}; std::size_t size_bytes{0}; }; // TODO struct cumulative_size_and_row { - int64_t count{0}; + std::size_t count{0}; std::size_t size_bytes{0}; std::size_t rows{0}; }; @@ -279,7 +278,9 @@ struct cumulative_size_sum { * given `size_limit`. */ template -std::vector find_splits(host_span sizes, int64_t total_count, size_t size_limit); +std::vector find_splits(host_span sizes, + std::size_t total_count, + std::size_t size_limit); // TODO std::pair get_range(std::vector const& input_chunks, @@ -290,7 +291,7 @@ std::pair get_range(std::vector const& input_chunks, * data, but not both. */ std::size_t gather_stream_info_and_column_desc( - int64_t stripe_processing_order, + std::size_t stripe_processing_order, std::size_t level, orc::StripeInformation const* stripeinfo, orc::StripeFooter const* stripefooter, diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index f96efaa0174..dba2a4b135e 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -787,7 +787,8 @@ std::vector find_table_splits(table_view const& input, auto const current_length = cuda::std::min(segment_length, num_rows - segment_length * segment_idx); auto const size = d_sizes[segment_idx]; - return cumulative_size{current_length, static_cast(size)}; + return cumulative_size{static_cast(current_length), + static_cast(size)}; }); #ifdef LOCAL_TEST @@ -1262,7 +1263,7 @@ void reader::impl::decompress_and_decode() // TODO: only reset each one if the new size/type are different. stripe_data[stripe_start - load_stripe_start] = std::move(decomp_data); - for (int64_t i = 1; i < stripe_chunk.count; ++i) { + for (std::size_t i = 1; i < stripe_chunk.count; ++i) { stripe_data[i + stripe_start - load_stripe_start] = {}; } @@ -1467,7 +1468,7 @@ void reader::impl::decompress_and_decode() if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { stripe_data[stripe_start - load_stripe_start] = {}; } else { - for (int64_t i = 0; i < stripe_chunk.count; ++i) { + for (std::size_t i = 0; i < stripe_chunk.count; ++i) { stripe_data[i + stripe_start - load_stripe_start] = {}; } } @@ -1491,7 +1492,8 @@ void reader::impl::decompress_and_decode() _chunk_read_data.curr_output_table_chunk = 0; _chunk_read_data.output_table_chunks = _chunk_read_data.output_size_limit == 0 - ? std::vector{chunk{0, _chunk_read_data.decoded_table->num_rows()}} + ? std::vector{chunk{ + 0, static_cast(_chunk_read_data.decoded_table->num_rows())}} : find_table_splits(_chunk_read_data.decoded_table->view(), _chunk_read_data.output_row_granularity, _chunk_read_data.output_size_limit, From bcdfab89a1066c78e9dfd8744ccaee1d08f7c818 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 8 Mar 2024 21:56:19 -0800 Subject: [PATCH 204/321] Change from chunk to range Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 15 ++-- cpp/src/io/orc/reader_impl_chunking.cu | 107 ++++++++++++------------ cpp/src/io/orc/reader_impl_chunking.hpp | 41 ++++----- cpp/src/io/orc/reader_impl_decode.cu | 60 +++++++------ 4 files changed, 105 insertions(+), 118 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 51bbd47d690..57770cec4fe 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -129,8 +129,8 @@ table_with_metadata reader::impl::make_output_chunk() } auto out_table = [&] { - if (_chunk_read_data.output_table_chunks.size() == 1) { - _chunk_read_data.curr_output_table_chunk++; + if (_chunk_read_data.output_table_ranges.size() == 1) { + _chunk_read_data.curr_output_table_range++; #ifdef LOCAL_TEST printf("one chunk, no more table---------------------------------\n"); #endif @@ -147,12 +147,11 @@ table_with_metadata reader::impl::make_output_chunk() #endif auto const out_chunk = - _chunk_read_data.output_table_chunks[_chunk_read_data.curr_output_table_chunk++]; - auto const out_tview = - cudf::detail::slice(_chunk_read_data.decoded_table->view(), - {static_cast(out_chunk.start_idx), - static_cast(out_chunk.start_idx + out_chunk.count)}, - _stream)[0]; + _chunk_read_data.output_table_ranges[_chunk_read_data.curr_output_table_range++]; + auto const out_tview = cudf::detail::slice( + _chunk_read_data.decoded_table->view(), + {static_cast(out_chunk.begin), static_cast(out_chunk.end)}, + _stream)[0]; #ifdef LOCAL_TEST { diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index ab0c5171f08..0333492d1c7 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -192,7 +192,7 @@ std::size_t gather_stream_info_and_column_desc( * given `size_limit`. */ template -std::vector find_splits(host_span sizes, +std::vector find_splits(host_span sizes, std::size_t total_count, std::size_t size_limit) { @@ -202,7 +202,7 @@ std::vector find_splits(host_span sizes, // } CUDF_EXPECTS(size_limit > 0, "Invalid size limit"); - std::vector splits; + std::vector splits; std::size_t cur_count{0}; int64_t cur_pos{0}; size_t cur_cumulative_size{0}; @@ -241,9 +241,13 @@ std::vector find_splits(host_span sizes, split_pos++; } + // #ifdef LOCAL_TEST + // printf(" split_pos: %d\n", (int)split_pos); + // #endif + auto const start_idx = cur_count; cur_count = sizes[split_pos].count; - splits.emplace_back(chunk{start_idx, cur_count - start_idx}); + splits.emplace_back(range{start_idx, cur_count}); cur_pos = split_pos; cur_cumulative_size = sizes[split_pos].size_bytes; @@ -257,45 +261,39 @@ std::vector find_splits(host_span sizes, if (splits.size() > 1) { auto constexpr merge_threshold = 0.15; if (auto const last = splits.back(), second_last = splits[splits.size() - 2]; - last.count <= static_cast(merge_threshold * second_last.count)) { + (last.end - last.begin) <= + static_cast(merge_threshold * (second_last.end - second_last.begin))) { splits.pop_back(); - splits.back().count += last.count; + splits.back().end = last.end; } } return splits; } -template std::vector find_splits(host_span sizes, +template std::vector find_splits(host_span sizes, std::size_t total_count, std::size_t size_limit); -template std::vector find_splits( +template std::vector find_splits( host_span sizes, std::size_t total_count, std::size_t size_limit); #endif /** - * @brief Find range of the data span by a given chunk of chunks. + * @brief Find range of the data span by a given range of ranges. * - * @param input_chunks The list of all data chunks - * @param selected_chunks A chunk of chunks in the input_chunks - * @return The range of data span by the selected chunk of given chunks + * @param input_ranges The list of all data chunks + * @param selected_ranges A chunk of chunks in the input_chunks + * @return The range of data span by the selected range of given chunks */ -std::pair get_range(std::vector const& input_chunks, - chunk const& selected_chunks) +std::pair get_range(std::vector const& input_ranges, + range const& selected_ranges) { - // Range indices to input_chunks - auto const chunk_begin = selected_chunks.start_idx; - auto const chunk_end = selected_chunks.start_idx + selected_chunks.count; - - // The first and last chunk, according to selected_chunk. - auto const& first_chunk = input_chunks[chunk_begin]; - auto const& last_chunk = input_chunks[chunk_end - 1]; - - // The range of data covered from the first to the last chunk. - auto const begin = first_chunk.start_idx; - auto const end = last_chunk.start_idx + last_chunk.count; + // The first and last range, according to selected_chunk. + auto const& first_range = input_ranges[selected_ranges.begin]; + auto const& last_range = input_ranges[selected_ranges.end - 1]; - return {begin, end}; + // The range of data covered from the first to the last range. + return {first_range.begin, last_range.end}; } void reader::impl::global_preprocess(read_mode mode) @@ -337,8 +335,8 @@ void reader::impl::global_preprocess(read_mode mode) lvl_stripe_sizes.resize(_selected_columns.num_levels()); auto& read_info = _file_itm_data.data_read_info; - auto& stripe_data_read_chunks = _file_itm_data.stripe_data_read_chunks; - auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_chunks; + auto& stripe_data_read_chunks = _file_itm_data.stripe_data_read_ranges; + auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_ranges; // Logically view streams as columns _file_itm_data.lvl_stream_info.resize(_selected_columns.num_levels()); @@ -430,7 +428,7 @@ void reader::impl::global_preprocess(read_mode mode) total_stripe_size += stripe_size; auto& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; - stripe_stream_chunks[stripe_idx] = chunk{stream_count, stream_info.size() - stream_count}; + stripe_stream_chunks[stripe_idx] = range{stream_count, stream_info.size()}; // Coalesce consecutive streams into one read while (not is_stripe_data_empty and stream_count < stream_info.size()) { @@ -448,10 +446,10 @@ void reader::impl::global_preprocess(read_mode mode) } } total_stripe_sizes[stripe_idx] = {1, total_stripe_size}; - stripe_data_read_chunks[stripe_idx] = chunk{last_read_size, read_info.size() - last_read_size}; + stripe_data_read_chunks[stripe_idx] = range{last_read_size, read_info.size()}; } - _chunk_read_data.curr_load_stripe_chunk = 0; + _chunk_read_data.curr_load_stripe_range = 0; // Load all chunks if there is no read limit. if (_chunk_read_data.data_read_limit == 0) { @@ -459,7 +457,7 @@ void reader::impl::global_preprocess(read_mode mode) printf("0 limit: output load stripe chunk = 0, %d\n", (int)num_stripes); #endif - _chunk_read_data.load_stripe_chunks = {chunk{0ul, num_stripes}}; + _chunk_read_data.load_stripe_ranges = {range{0ul, num_stripes}}; return; } @@ -499,14 +497,14 @@ void reader::impl::global_preprocess(read_mode mode) chunk_read_data::load_limit_ratio); return tmp > 0UL ? tmp : 1UL; }(); - _chunk_read_data.load_stripe_chunks = + _chunk_read_data.load_stripe_ranges = find_splits(total_stripe_sizes, num_stripes, load_limit); #ifdef LOCAL_TEST - auto& splits = _chunk_read_data.load_stripe_chunks; + auto& splits = _chunk_read_data.load_stripe_ranges; printf("------------\nSplits (/total num stripe = %d): \n", (int)num_stripes); for (size_t idx = 0; idx < splits.size(); idx++) { - printf("{%ld, %ld}\n", splits[idx].start_idx, splits[idx].count); + printf("{%ld, %ld}\n", splits[idx].begin, splits[idx].end); } fflush(stdout); #endif @@ -525,9 +523,10 @@ void reader::impl::load_data() // std::size_t num_stripes = selected_stripes.size(); auto const stripe_chunk = - _chunk_read_data.load_stripe_chunks[_chunk_read_data.curr_load_stripe_chunk++]; - auto const stripe_start = stripe_chunk.start_idx; - auto const stripe_end = stripe_chunk.start_idx + stripe_chunk.count; + _chunk_read_data.load_stripe_ranges[_chunk_read_data.curr_load_stripe_range++]; + auto const stripe_start = stripe_chunk.begin; + auto const stripe_end = stripe_chunk.end; + auto const stripe_count = stripe_end - stripe_start; #ifdef LOCAL_TEST printf("\n\nloading data from stripe %d -> %d\n", (int)stripe_start, (int)stripe_end); @@ -537,7 +536,7 @@ void reader::impl::load_data() // TODO: clear all old buffer. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto& stripe_data = lvl_stripe_data[level]; - stripe_data.resize(stripe_chunk.count); + stripe_data.resize(stripe_count); auto& stripe_sizes = lvl_stripe_sizes[level]; for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { @@ -550,7 +549,7 @@ void reader::impl::load_data() std::vector> host_read_buffers; std::vector, std::size_t>> read_tasks; - auto const& stripe_data_read_chunks = _file_itm_data.stripe_data_read_chunks; + auto const& stripe_data_read_chunks = _file_itm_data.stripe_data_read_ranges; auto const [read_begin, read_end] = get_range(stripe_data_read_chunks, stripe_chunk); for (auto read_idx = read_begin; read_idx < read_end; ++read_idx) { @@ -581,15 +580,15 @@ void reader::impl::load_data() CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); } - auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_chunks; + auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_ranges; // TODO: This is subpass // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. stream_source_map stream_compinfo_map; - cudf::detail::hostdevice_vector stripe_decomp_sizes(stripe_chunk.count, + cudf::detail::hostdevice_vector stripe_decomp_sizes(stripe_count, _stream); - for (std::size_t stripe_idx = 0; stripe_idx < stripe_chunk.count; ++stripe_idx) { + for (std::size_t stripe_idx = 0; stripe_idx < stripe_count; ++stripe_idx) { auto const& stripe = selected_stripes[stripe_idx]; auto const stripe_info = stripe.stripe_info; @@ -659,7 +658,7 @@ void reader::impl::load_data() compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, stream_compinfo->num_uncompressed_blocks, stream_compinfo->max_uncompressed_size}; - stripe_decomp_sizes[stream_id.stripe_idx - stripe_chunk.start_idx].size_bytes += + stripe_decomp_sizes[stream_id.stripe_idx - stripe_start].size_bytes += stream_compinfo->max_uncompressed_size; #ifdef LOCAL_TEST @@ -687,8 +686,7 @@ void reader::impl::load_data() // Set decompression size equal to the input size. for (auto stream_idx = stream_begin; stream_idx < stream_end; ++stream_idx) { auto const& info = stream_info[stream_idx]; - stripe_decomp_sizes[info.source.stripe_idx - stripe_chunk.start_idx].size_bytes += - info.length; + stripe_decomp_sizes[info.source.stripe_idx - stripe_start].size_bytes += info.length; } } @@ -697,7 +695,7 @@ void reader::impl::load_data() } // end loop level // Decoding is reset to start from the first chunk in `decode_stripe_chunks`. - _chunk_read_data.curr_decode_stripe_chunk = 0; + _chunk_read_data.curr_decode_stripe_range = 0; // Decode all chunks if there is no read and no output limit. // In theory, we should just decode enough stripes for output one table chunk. @@ -715,7 +713,7 @@ void reader::impl::load_data() printf("0 limit: output decode stripe chunk unchanged\n"); #endif - _chunk_read_data.decode_stripe_chunks = {stripe_chunk}; + _chunk_read_data.decode_stripe_ranges = {stripe_chunk}; return; } @@ -764,17 +762,18 @@ void reader::impl::load_data() (1.0 - chunk_read_data::load_limit_ratio)); return tmp > 0UL ? tmp : 1UL; }(); - _chunk_read_data.decode_stripe_chunks = - find_splits(stripe_decomp_sizes, stripe_chunk.count, decode_limit); - for (auto& chunk : _chunk_read_data.decode_stripe_chunks) { - chunk.start_idx += stripe_chunk.start_idx; + _chunk_read_data.decode_stripe_ranges = + find_splits(stripe_decomp_sizes, stripe_count, decode_limit); + for (auto& chunk : _chunk_read_data.decode_stripe_ranges) { + chunk.begin += stripe_start; + chunk.end += stripe_start; } #ifdef LOCAL_TEST - auto& splits = _chunk_read_data.decode_stripe_chunks; - printf("------------\nSplits decode_stripe_chunks (/%d): \n", (int)stripe_chunk.count); + auto& splits = _chunk_read_data.decode_stripe_ranges; + printf("------------\nSplits decode_stripe_chunks (/%d): \n", (int)stripe_count); for (size_t idx = 0; idx < splits.size(); idx++) { - printf("{%ld, %ld}\n", splits[idx].start_idx, splits[idx].count); + printf("{%ld, %ld}\n", splits[idx].begin, splits[idx].end); } fflush(stdout); #endif diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 9e70ec246a1..ff54d542a2b 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -92,21 +92,12 @@ struct stripe_level_comp_info { std::size_t total_decomp_size{0}; }; -// TODO: remove this and use range instead -/** - * @brief Struct that store information about a chunk of data. - */ -struct chunk { - std::size_t start_idx; - std::size_t count; -}; - /** * @brief Struct that store information about a range of data. */ struct range { - int64_t begin; - int64_t end; + std::size_t begin; + std::size_t end; }; /** @@ -166,14 +157,14 @@ struct file_intermediate_data { // For each stripe, we perform a number of read for its streams. // Those reads are identified by a chunk of consecutive read info, stored in data_read_info. - std::vector stripe_data_read_chunks; + std::vector stripe_data_read_ranges; // Store info for each ORC stream at each nested level. std::vector> lvl_stream_info; // At each nested level, the streams for each stripe are stored consecutively in lvl_stream_info. // This is used to identify the range of streams for each stripe from that vector. - std::vector> lvl_stripe_stream_chunks; + std::vector> lvl_stripe_stream_ranges; // TODO rename std::vector>> null_count_prefix_sums; @@ -207,25 +198,25 @@ struct chunk_read_data { // Chunks of stripes that can be load into memory such that their data size is within a size // limit. - std::vector load_stripe_chunks; - std::size_t curr_load_stripe_chunk{0}; - bool more_stripe_to_load() const { return curr_load_stripe_chunk < load_stripe_chunks.size(); } + std::vector load_stripe_ranges; + std::size_t curr_load_stripe_range{0}; + bool more_stripe_to_load() const { return curr_load_stripe_range < load_stripe_ranges.size(); } // Chunks of stripes such that their decompression size is within a size limit. - std::vector decode_stripe_chunks; - std::size_t curr_decode_stripe_chunk{0}; + std::vector decode_stripe_ranges; + std::size_t curr_decode_stripe_range{0}; bool more_stripe_to_decode() const { - return curr_decode_stripe_chunk < decode_stripe_chunks.size(); + return curr_decode_stripe_range < decode_stripe_ranges.size(); } // Chunk of rows in the internal decoded table to output for each `read_chunk()`. - std::vector output_table_chunks; - std::size_t curr_output_table_chunk{0}; + std::vector output_table_ranges; + std::size_t curr_output_table_range{0}; std::unique_ptr decoded_table; bool more_table_chunk_to_output() const { - return curr_output_table_chunk < output_table_chunks.size(); + return curr_output_table_range < output_table_ranges.size(); } // Only has more chunk to output if: @@ -278,13 +269,13 @@ struct cumulative_size_sum { * given `size_limit`. */ template -std::vector find_splits(host_span sizes, +std::vector find_splits(host_span sizes, std::size_t total_count, std::size_t size_limit); // TODO -std::pair get_range(std::vector const& input_chunks, - chunk const& selected_chunks); +std::pair get_range(std::vector const& input_ranges, + range const& selected_ranges); /** * @brief Function that populates descriptors for either individual streams or chunks of column diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index dba2a4b135e..87fdc40b351 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -78,8 +78,8 @@ namespace { * @return Device buffer to decompressed page data */ rmm::device_buffer decompress_stripe_data( - chunk const& load_stripe_chunk, - chunk const& stripe_chunk, + range const& load_stripe_range, + range const& stripe_range, stream_source_map const& compinfo_map, OrcDecompressor const& decompressor, host_span stripe_data, @@ -101,8 +101,7 @@ rmm::device_buffer decompress_stripe_data( // TODO: use lvl_stripe_stream_chunks std::size_t count{0}; for (auto const& info : stream_info) { - if (info.source.stripe_idx < stripe_chunk.start_idx || - info.source.stripe_idx >= stripe_chunk.start_idx + stripe_chunk.count) { + if (info.source.stripe_idx < stripe_range.begin || info.source.stripe_idx >= stripe_range.end) { continue; } count++; @@ -111,8 +110,7 @@ rmm::device_buffer decompress_stripe_data( cudf::detail::hostdevice_vector compinfo(0, count, stream); for (auto const& info : stream_info) { - if (info.source.stripe_idx < stripe_chunk.start_idx || - info.source.stripe_idx >= stripe_chunk.start_idx + stripe_chunk.count) { + if (info.source.stripe_idx < stripe_range.begin || info.source.stripe_idx >= stripe_range.end) { continue; } @@ -129,7 +127,7 @@ rmm::device_buffer decompress_stripe_data( compinfo.push_back(gpu::CompressedStreamInfo( static_cast( - stripe_data[info.source.stripe_idx - load_stripe_chunk.start_idx].data()) + + stripe_data[info.source.stripe_idx - load_stripe_range.begin].data()) + info.dst_pos, info.length)); @@ -749,7 +747,7 @@ void generate_offsets_for_list(host_span buff_data, rmm::cuda_ * @param stream * @return */ -std::vector find_table_splits(table_view const& input, +std::vector find_table_splits(table_view const& input, size_type segment_length, std::size_t size_limit, rmm::cuda_stream_view stream) @@ -825,12 +823,13 @@ void reader::impl::decompress_and_decode() if (_file_itm_data.has_no_data()) { return; } auto const stripe_chunk = - _chunk_read_data.decode_stripe_chunks[_chunk_read_data.curr_decode_stripe_chunk++]; - auto const stripe_start = stripe_chunk.start_idx; - auto const stripe_end = stripe_chunk.start_idx + stripe_chunk.count; + _chunk_read_data.decode_stripe_ranges[_chunk_read_data.curr_decode_stripe_range++]; + auto const stripe_start = stripe_chunk.begin; + auto const stripe_end = stripe_chunk.end; + auto const stripe_count = stripe_chunk.end - stripe_chunk.begin; auto const load_stripe_start = - _chunk_read_data.load_stripe_chunks[_chunk_read_data.curr_load_stripe_chunk - 1].start_idx; + _chunk_read_data.load_stripe_ranges[_chunk_read_data.curr_load_stripe_range - 1].begin; #ifdef LOCAL_TEST printf("\ndecoding data from stripe %d -> %d\n", (int)stripe_start, (int)stripe_end); @@ -903,7 +902,6 @@ void reader::impl::decompress_and_decode() // // TODO: move this to reader_impl.cu, decomp and decode step // std::size_t num_stripes = selected_stripes.size(); - std::size_t num_stripes = stripe_chunk.count; // Iterates through levels of nested columns, child column will be one level down // compared to parent column. @@ -967,7 +965,7 @@ void reader::impl::decompress_and_decode() #endif - auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_chunks; + auto& lvl_stripe_stream_ranges = _file_itm_data.lvl_stripe_stream_ranges; for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { #ifdef LOCAL_TEST @@ -981,8 +979,8 @@ void reader::impl::decompress_and_decode() } #endif - auto const& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; - auto const [stream_begin, stream_end] = get_range(stripe_stream_chunks, stripe_chunk); + auto const& stripe_stream_ranges = lvl_stripe_stream_ranges[level]; + auto const [stream_begin, stream_end] = get_range(stripe_stream_ranges, stripe_chunk); auto& columns_level = _selected_columns.levels[level]; @@ -1019,7 +1017,7 @@ void reader::impl::decompress_and_decode() auto const num_columns = columns_level.size(); auto& chunks = lvl_chunks[level]; - chunks = cudf::detail::hostdevice_2dvector(num_stripes, num_columns, _stream); + chunks = cudf::detail::hostdevice_2dvector(stripe_count, num_columns, _stream); memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); #ifdef LOCAL_TEST @@ -1038,7 +1036,7 @@ void reader::impl::decompress_and_decode() // Only use if we don't have much work with complete columns & stripes // TODO: Consider nrows, gpu, and tune the threshold (rows_to_read > _metadata.get_row_index_stride() && !(_metadata.get_row_index_stride() & 7) && - _metadata.get_row_index_stride() != 0 && num_columns * num_stripes < 8 * 128) && + _metadata.get_row_index_stride() != 0 && num_columns * stripe_count < 8 * 128) && // Only use if first row is aligned to a stripe boundary // TODO: Fix logic to handle unaligned rows (rows_to_skip == 0); @@ -1056,7 +1054,7 @@ void reader::impl::decompress_and_decode() _selected_columns.levels[level].size(), [&]() { return cudf::detail::make_zeroed_device_uvector_async( - num_stripes, _stream, rmm::mr::get_current_device_resource()); + stripe_count, _stream, rmm::mr::get_current_device_resource()); }); // Tracker for eventually deallocating compressed and uncompressed data @@ -1234,7 +1232,7 @@ void reader::impl::decompress_and_decode() // printf("decompress----------------------\n"); // printf("line %d\n", __LINE__); // fflush(stdout); - CUDF_EXPECTS(_chunk_read_data.curr_load_stripe_chunk > 0, "ERRRRR"); + CUDF_EXPECTS(_chunk_read_data.curr_load_stripe_range > 0, "ERRRRR"); #ifdef LOCAL_TEST { @@ -1246,7 +1244,7 @@ void reader::impl::decompress_and_decode() #endif auto decomp_data = decompress_stripe_data( - _chunk_read_data.load_stripe_chunks[_chunk_read_data.curr_load_stripe_chunk - 1], + _chunk_read_data.load_stripe_ranges[_chunk_read_data.curr_load_stripe_range - 1], stripe_chunk, _file_itm_data.compinfo_map, *_metadata.per_file_metadata[0].decompressor, @@ -1254,7 +1252,7 @@ void reader::impl::decompress_and_decode() stream_info, chunks, row_groups, - num_stripes, + stripe_count, _metadata.get_row_index_stride(), level == 0, _stream); @@ -1263,7 +1261,7 @@ void reader::impl::decompress_and_decode() // TODO: only reset each one if the new size/type are different. stripe_data[stripe_start - load_stripe_start] = std::move(decomp_data); - for (std::size_t i = 1; i < stripe_chunk.count; ++i) { + for (std::size_t i = 1; i < stripe_count; ++i) { stripe_data[i + stripe_start - load_stripe_start] = {}; } @@ -1292,7 +1290,7 @@ void reader::impl::decompress_and_decode() nullptr, chunks.base_device_ptr(), num_columns, - num_stripes, + stripe_count, _metadata.get_row_index_stride(), level == 0, _stream); @@ -1326,7 +1324,7 @@ void reader::impl::decompress_and_decode() for (std::size_t i = 0; i < column_types.size(); ++i) { bool is_nullable = false; - for (std::size_t j = 0; j < num_stripes; ++j) { + for (std::size_t j = 0; j < stripe_count; ++j) { if (chunks[j][i].strm_len[gpu::CI_PRESENT] != 0) { #ifdef LOCAL_TEST printf(" is nullable\n"); @@ -1468,7 +1466,7 @@ void reader::impl::decompress_and_decode() if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { stripe_data[stripe_start - load_stripe_start] = {}; } else { - for (std::size_t i = 0; i < stripe_chunk.count; ++i) { + for (std::size_t i = 0; i < stripe_count; ++i) { stripe_data[i + stripe_start - load_stripe_start] = {}; } } @@ -1489,10 +1487,10 @@ void reader::impl::decompress_and_decode() // DEBUG only // _chunk_read_data.output_size_limit = _chunk_read_data.data_read_limit / 3; - _chunk_read_data.curr_output_table_chunk = 0; - _chunk_read_data.output_table_chunks = + _chunk_read_data.curr_output_table_range = 0; + _chunk_read_data.output_table_ranges = _chunk_read_data.output_size_limit == 0 - ? std::vector{chunk{ + ? std::vector{range{ 0, static_cast(_chunk_read_data.decoded_table->num_rows())}} : find_table_splits(_chunk_read_data.decoded_table->view(), _chunk_read_data.output_row_granularity, @@ -1500,11 +1498,11 @@ void reader::impl::decompress_and_decode() _stream); #ifdef LOCAL_TEST - auto& splits = _chunk_read_data.output_table_chunks; + auto& splits = _chunk_read_data.output_table_ranges; printf("------------\nSplits decoded table (/total num rows = %d): \n", (int)_chunk_read_data.decoded_table->num_rows()); for (size_t idx = 0; idx < splits.size(); idx++) { - printf("{%ld, %ld}\n", splits[idx].start_idx, splits[idx].count); + printf("{%ld, %ld}\n", splits[idx].begin, splits[idx].end); } fflush(stdout); From 98d82fc728cff400657509ef4eebf4e42c2ea732 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 8 Mar 2024 22:30:46 -0800 Subject: [PATCH 205/321] Cleanup Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 2 +- cpp/src/io/orc/reader_impl_chunking.hpp | 31 +++++++++++-------------- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 0333492d1c7..b54a4483523 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -442,7 +442,7 @@ void reader::impl::global_preprocess(read_mode mode) len += stream_info[stream_count].length; stream_count++; } - read_info.emplace_back(offset, len, d_dst, stripe.source_idx, stripe_idx, level); + read_info.emplace_back(offset, d_dst, len, stripe.source_idx, stripe_idx, level); } } total_stripe_sizes[stripe_idx] = {1, total_stripe_size}; diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index ff54d542a2b..5731bd242d9 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -24,13 +24,12 @@ #include #include -#include #include namespace cudf::io::orc::detail { /** - * @brief Struct that store identification of an ORC streams + * @brief Struct that store source information of an ORC streams. */ struct stream_source_info { std::size_t stripe_idx; // global stripe id throughout all data sources @@ -57,14 +56,14 @@ struct stream_source_info { }; /** - * @brief Map to lookup a value from stream id. + * @brief Map to lookup a value from stream source. */ template using stream_source_map = std::unordered_map; /** - * @brief Struct that store identification of an ORC stream. + * @brief Struct that store information of an ORC stream. */ struct orc_stream_info { explicit orc_stream_info(uint64_t offset_, @@ -79,7 +78,7 @@ struct orc_stream_info { std::size_t dst_pos; // offset to store data in memory relative to start of raw stripe data std::size_t length; // stream length to read - // Store location of the stream in the stripe, so we can look up where this stream comes from. + // Store source of the stream in the stripe, so we can look up where this stream comes from. stream_source_info source; }; @@ -101,7 +100,7 @@ struct range { }; /** - * @brief Struct to store file-level data that remains constant for all chunks being output. + * @brief Struct to store intermediate processing data loaded from data sources. */ struct file_intermediate_data { int64_t rows_to_skip; @@ -111,9 +110,6 @@ struct file_intermediate_data { // Return true if no rows or stripes to read. bool has_no_data() const { return rows_to_read == 0 || selected_stripes.empty(); } - // TODO: remove - std::size_t num_stripes() const { return selected_stripes.size(); } - // Store the compression information for each data stream. stream_source_map compinfo_map; @@ -129,26 +125,26 @@ struct file_intermediate_data { // Store information to identify where to read a chunk of data from source. // Each read corresponds to one or more consecutive streams combined. struct stream_data_read_info { - // TODO: remove constructor stream_data_read_info(uint64_t offset_, - std::size_t length_, std::size_t dst_pos_, + std::size_t length_, std::size_t source_idx_, std::size_t stripe_idx_, std::size_t level_) : offset(offset_), - length(length_), dst_pos(dst_pos_), + length(length_), source_idx(source_idx_), stripe_idx(stripe_idx_), level(level_) { } + uint64_t offset; // offset in data source std::size_t dst_pos; // offset to store data in memory relative to start of raw stripe data std::size_t length; // data length to read std::size_t source_idx; // the data source id - std::size_t stripe_idx; // stream id TODO: processing or source stripe id? + std::size_t stripe_idx; // global stripe index std::size_t level; // nested level }; @@ -190,10 +186,11 @@ struct chunk_read_data { } // TODO: const for 3 below? - std::size_t output_size_limit; // maximum size (in bytes) of an output chunk, or 0 for no limit - std::size_t data_read_limit; // approximate maximum size (in bytes) used for store - // intermediate data, or 0 for no limit - size_type output_row_granularity; // TODO + std::size_t const + output_size_limit; // maximum size (in bytes) of an output chunk, or 0 for no limit + std::size_t const data_read_limit; // approximate maximum size (in bytes) used for store + // intermediate data, or 0 for no limit + size_type const output_row_granularity; // TODO static double constexpr load_limit_ratio{0.4}; // TODO // Chunks of stripes that can be load into memory such that their data size is within a size From 1ec9dc0ef7cc0f10adc3602d3d75fd14b633f3f3 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 9 Mar 2024 19:19:13 -0800 Subject: [PATCH 206/321] Cleanup and rename variable Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.hpp | 1 + cpp/src/io/orc/reader_impl_chunking.cu | 46 ++++++++----------------- cpp/src/io/orc/reader_impl_chunking.hpp | 45 ++++++++++++++++++------ 3 files changed, 50 insertions(+), 42 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index ae518bc2a5f..b3f91e5e92a 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -212,6 +212,7 @@ class reader::impl { table_metadata _out_metadata; std::vector> _out_buffers; + // The default value used for subdividing the decoded table for final output. static constexpr size_type DEFAULT_OUTPUT_ROW_GRANULARITY = 10'000; }; diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index b54a4483523..44a247c2405 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -84,8 +84,8 @@ std::size_t gather_stream_info_and_column_desc( CUDF_EXPECTS(stream_info.has_value() ^ chunks.has_value(), "Either stream_info or chunks must be provided, but not both."); - uint64_t src_offset = 0; - uint64_t dst_offset = 0; + std::size_t src_offset = 0; + std::size_t dst_offset = 0; auto const get_stream_index_type = [](orc::StreamKind kind) { switch (kind) { @@ -186,20 +186,11 @@ std::size_t gather_stream_info_and_column_desc( return dst_offset; } -#if 1 -/** - * @brief Find the splits of the input data such that each split has cumulative size less than a - * given `size_limit`. - */ template -std::vector find_splits(host_span sizes, +std::vector find_splits(host_span cumulative_sizes, std::size_t total_count, std::size_t size_limit) { - // if (size_limit == 0) { - // printf("0 limit: output chunk = 0, %d\n", (int)total_count); - // return {chunk{0, total_count}}; - // } CUDF_EXPECTS(size_limit > 0, "Invalid size limit"); std::vector splits; @@ -210,8 +201,9 @@ std::vector find_splits(host_span sizes, [[maybe_unused]] size_t cur_cumulative_rows{0}; auto const start = thrust::make_transform_iterator( - sizes.begin(), [&](auto const& size) { return size.size_bytes - cur_cumulative_size; }); - auto const end = start + static_cast(sizes.size()); + cumulative_sizes.begin(), + [&](auto const& size) { return size.size_bytes - cur_cumulative_size; }); + auto const end = start + static_cast(cumulative_sizes.size()); while (cur_count < total_count) { int64_t split_pos = @@ -219,13 +211,13 @@ std::vector find_splits(host_span sizes, // If we're past the end, or if the returned bucket is bigger than the chunk_read_limit, move // back one. - if (static_cast(split_pos) >= sizes.size() || - (sizes[split_pos].size_bytes - cur_cumulative_size > size_limit)) { + if (static_cast(split_pos) >= cumulative_sizes.size() || + (cumulative_sizes[split_pos].size_bytes - cur_cumulative_size > size_limit)) { split_pos--; } if constexpr (std::is_same_v) { - while (split_pos > 0 && sizes[split_pos].rows - cur_cumulative_rows > + while (split_pos > 0 && cumulative_sizes[split_pos].rows - cur_cumulative_rows > static_cast(std::numeric_limits::max())) { split_pos--; } @@ -236,8 +228,8 @@ std::vector find_splits(host_span sizes, // so if we had two columns, both of which had an entry {1000, 10000}, that entry would be in // the list twice. so we have to iterate until we skip past all of them. The idea is that we // either do this, or we have to call unique() on the input first. - while (split_pos < (static_cast(sizes.size()) - 1) && - (split_pos < 0 || sizes[split_pos].count <= cur_count)) { + while (split_pos < (static_cast(cumulative_sizes.size()) - 1) && + (split_pos < 0 || cumulative_sizes[split_pos].count <= cur_count)) { split_pos++; } @@ -246,13 +238,13 @@ std::vector find_splits(host_span sizes, // #endif auto const start_idx = cur_count; - cur_count = sizes[split_pos].count; + cur_count = cumulative_sizes[split_pos].count; splits.emplace_back(range{start_idx, cur_count}); cur_pos = split_pos; - cur_cumulative_size = sizes[split_pos].size_bytes; + cur_cumulative_size = cumulative_sizes[split_pos].size_bytes; if constexpr (std::is_same_v) { - cur_cumulative_rows = sizes[split_pos].rows; + cur_cumulative_rows = cumulative_sizes[split_pos].rows; } } @@ -276,19 +268,11 @@ template std::vector find_splits(host_span find_splits( host_span sizes, std::size_t total_count, std::size_t size_limit); -#endif -/** - * @brief Find range of the data span by a given range of ranges. - * - * @param input_ranges The list of all data chunks - * @param selected_ranges A chunk of chunks in the input_chunks - * @return The range of data span by the selected range of given chunks - */ std::pair get_range(std::vector const& input_ranges, range const& selected_ranges) { - // The first and last range, according to selected_chunk. + // The first and last range. auto const& first_range = input_ranges[selected_ranges.begin]; auto const& last_range = input_ranges[selected_ranges.end - 1]; diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 5731bd242d9..ba3611d5757 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -83,7 +83,7 @@ struct orc_stream_info { }; /** - * @brief Struct that store compression information for a stripe at a specific nested level. + * @brief Compression information for a stripe at a specific nested level. */ struct stripe_level_comp_info { std::size_t num_compressed_blocks{0}; @@ -92,7 +92,7 @@ struct stripe_level_comp_info { }; /** - * @brief Struct that store information about a range of data. + * @brief Struct representing a range of data. */ struct range { std::size_t begin; @@ -100,7 +100,7 @@ struct range { }; /** - * @brief Struct to store intermediate processing data loaded from data sources. + * @brief Struct storing intermediate processing data loaded from data sources. */ struct file_intermediate_data { int64_t rows_to_skip; @@ -173,7 +173,7 @@ struct file_intermediate_data { }; /** - * @brief Struct to store all data necessary for chunked reading. + * @brief Struct collecting data necessary for chunked reading. */ struct chunk_read_data { explicit chunk_read_data(std::size_t output_size_limit_, @@ -231,14 +231,17 @@ struct chunk_read_data { }; /** - * @brief Struct to accumulate sizes of chunks of some data such as stripe or rows. + * @brief Struct to accumulate counts and sizes of some types such as stripes or rows. */ struct cumulative_size { std::size_t count{0}; std::size_t size_bytes{0}; }; -// TODO +/** + * @brief Struct to accumulate counts, sizes, and number of rows of some types such as stripes or + * rows in tables. + */ struct cumulative_size_and_row { std::size_t count{0}; std::size_t size_bytes{0}; @@ -246,7 +249,7 @@ struct cumulative_size_and_row { }; /** - * @brief Functor to sum up cumulative sizes. + * @brief Functor to sum up cumulative data. */ struct cumulative_size_sum { __device__ cumulative_size operator()(cumulative_size const& a, cumulative_size const& b) const @@ -262,21 +265,41 @@ struct cumulative_size_sum { }; /** - * @brief Find the splits of the input data such that each split has cumulative size less than a - * given `size_limit`. + * @brief Find the splits of the input data such that each split range has cumulative size less than + * a given `size_limit`. + * + * Note that the given limit is just a soft limit. The function will always output ranges that + * have at least one count, even such ranges have sizes exceed the value of `size_limit`. + * + * @param cumulative_sizes The input cumulative sizes to compute split ranges + * @param total_count The total count in the entire input + * @param size_limit The given soft limit to compute splits + * @return A vector of ranges as splits of the input */ template -std::vector find_splits(host_span sizes, +std::vector find_splits(host_span cumulative_sizes, std::size_t total_count, std::size_t size_limit); -// TODO +/** + * @brief Expand a range of ranges into a simple range of data. + * + * @param input_ranges The list of all data ranges + * @param selected_ranges A range of ranges from `input_ranges` + * @return The range of data span by the selected range of ranges + */ std::pair get_range(std::vector const& input_ranges, range const& selected_ranges); /** * @brief Function that populates descriptors for either individual streams or chunks of column * data, but not both. + * + * This function is used in the global step, to gather information for streams of all stripes in + * the data sources (when `stream_info` is present). Later on, it is used again to populate column + * descriptors (`chunks` is present) during decompression and decoding. The two steps share + * most of the execution path thus this function takes mutually exclusive parameters `stream_info` + * or `chunks` depending on each use case. */ std::size_t gather_stream_info_and_column_desc( std::size_t stripe_processing_order, From 64c155aee1e5e0a9ee5fa273c980126e4f3812fa Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 9 Mar 2024 20:19:27 -0800 Subject: [PATCH 207/321] Further cleanup and rename variable Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 65 +++++++++----------------- 1 file changed, 22 insertions(+), 43 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 44a247c2405..4d815568e3d 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -105,12 +105,6 @@ std::size_t gather_stream_info_and_column_desc( if (!stream.column_id || *stream.column_id >= orc2gdf.size()) { // Ignore reading this stream from source. CUDF_LOG_WARN("Unexpected stream in the input ORC source. The stream will be ignored."); - -#ifdef LOCAL_TEST - printf("Unexpected stream in the input ORC source. The stream will be ignored\n"); - fflush(stdout); -#endif - src_offset += stream.length; continue; } @@ -142,14 +136,7 @@ std::size_t gather_stream_info_and_column_desc( if (src_offset >= stripeinfo->indexLength || use_index) { auto const index_type = get_stream_index_type(stream.kind); if (index_type < gpu::CI_NUM_STREAMS) { - auto& chunk = (*chunks.value())[stripe_processing_order][col]; - // printf("use stream id: %d, stripe: %d, level: %d, col idx: %d, kind: %d\n", - // (int)(*stream_idx), - // (int)stripe_index, - // (int)level, - // (int)column_id, - // (int)stream.kind); - + auto& chunk = (*chunks.value())[stripe_processing_order][col]; chunk.strm_id[index_type] = *stream_processing_order; chunk.strm_len[index_type] = stream.length; // NOTE: skip_count field is temporarily used to track the presence of index streams @@ -165,12 +152,6 @@ std::size_t gather_stream_info_and_column_desc( (*stream_processing_order)++; } else { // not chunks.has_value() - // printf("collect stream id: stripe: %d, level: %d, col idx: %d, kind: %d\n", - // (int)stripe_index, - // (int)level, - // (int)column_id, - // (int)stream.kind); - stream_info.value()->emplace_back( stripeinfo->offset + src_offset, dst_offset, @@ -196,50 +177,47 @@ std::vector find_splits(host_span cumulative_sizes, std::vector splits; std::size_t cur_count{0}; int64_t cur_pos{0}; - size_t cur_cumulative_size{0}; + std::size_t cur_cumulative_size{0}; - [[maybe_unused]] size_t cur_cumulative_rows{0}; + [[maybe_unused]] std::size_t cur_cumulative_rows{0}; auto const start = thrust::make_transform_iterator( cumulative_sizes.begin(), [&](auto const& size) { return size.size_bytes - cur_cumulative_size; }); - auto const end = start + static_cast(cumulative_sizes.size()); + auto const end = start + cumulative_sizes.size(); while (cur_count < total_count) { int64_t split_pos = thrust::distance(start, thrust::lower_bound(thrust::seq, start + cur_pos, end, size_limit)); - // If we're past the end, or if the returned bucket is bigger than the chunk_read_limit, move - // back one. - if (static_cast(split_pos) >= cumulative_sizes.size() || - (cumulative_sizes[split_pos].size_bytes - cur_cumulative_size > size_limit)) { + // If we're past the end, or if the returned range has size exceeds the given size limit, + // move back one position. + if (split_pos >= static_cast(cumulative_sizes.size()) || + (cumulative_sizes[split_pos].size_bytes > cur_cumulative_size + size_limit)) { split_pos--; } if constexpr (std::is_same_v) { - while (split_pos > 0 && cumulative_sizes[split_pos].rows - cur_cumulative_rows > - static_cast(std::numeric_limits::max())) { + // Similarly, while the returned range has total number of rows exceeds column size limit, + // move back one position. + while (split_pos > 0 && cumulative_sizes[split_pos].rows > + cur_cumulative_rows + + static_cast(std::numeric_limits::max())) { split_pos--; } } - // best-try. if we can't find something that'll fit, we have to go bigger. we're doing this in - // a loop because all of the cumulative sizes for all the pages are sorted into one big list. - // so if we had two columns, both of which had an entry {1000, 10000}, that entry would be in - // the list twice. so we have to iterate until we skip past all of them. The idea is that we - // either do this, or we have to call unique() on the input first. + // In case we have moved back too in the steps above, far beyond the last split point: that + // means we cannot find any range that has size fits within the given size limit. + // In such case, we need to move forward until we move pass the last output range. while (split_pos < (static_cast(cumulative_sizes.size()) - 1) && (split_pos < 0 || cumulative_sizes[split_pos].count <= cur_count)) { split_pos++; } - // #ifdef LOCAL_TEST - // printf(" split_pos: %d\n", (int)split_pos); - // #endif - - auto const start_idx = cur_count; - cur_count = cumulative_sizes[split_pos].count; - splits.emplace_back(range{start_idx, cur_count}); + auto const start_count = cur_count; + cur_count = cumulative_sizes[split_pos].count; + splits.emplace_back(range{start_count, cur_count}); cur_pos = split_pos; cur_cumulative_size = cumulative_sizes[split_pos].size_bytes; @@ -248,10 +226,11 @@ std::vector find_splits(host_span cumulative_sizes, } } - // If the last chunk has size smaller than `merge_threshold` percent of the second last one, + // If the last range has size smaller than `merge_threshold` percent of the second last one, // merge it with the second last one. + // This is to prevent having too small trailing range. if (splits.size() > 1) { - auto constexpr merge_threshold = 0.15; + double constexpr merge_threshold = 0.15; if (auto const last = splits.back(), second_last = splits[splits.size() - 2]; (last.end - last.begin) <= static_cast(merge_threshold * (second_last.end - second_last.begin))) { From 5b361fb89545fbfcc7ef97cea1008b5edad9d151 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 9 Mar 2024 20:53:42 -0800 Subject: [PATCH 208/321] Cleanup Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 97 ++++++++++++-------------- 1 file changed, 46 insertions(+), 51 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 4d815568e3d..5112a628b9f 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -288,75 +288,65 @@ void reader::impl::global_preprocess(read_mode mode) } #endif - // auto const rows_to_skip = _file_itm_data.rows_to_skip; - // auto const rows_to_read = _file_itm_data.rows_to_read; - auto const& selected_stripes = _file_itm_data.selected_stripes; + auto const num_total_stripes = _file_itm_data.selected_stripes.size(); + auto const num_levels = _selected_columns.num_levels(); + +#ifdef LOCAL_TEST + printf("num load stripe: %d\n", (int)num_total_stripes); +#endif - auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; - auto& lvl_stripe_sizes = _file_itm_data.lvl_stripe_sizes; - lvl_stripe_data.resize(_selected_columns.num_levels()); - lvl_stripe_sizes.resize(_selected_columns.num_levels()); + // + // Pre allocate necessary memory for data processed in the next steps: + // + auto& stripe_data_read_chunks = _file_itm_data.stripe_data_read_ranges; + stripe_data_read_chunks.resize(num_total_stripes); - auto& read_info = _file_itm_data.data_read_info; - auto& stripe_data_read_chunks = _file_itm_data.stripe_data_read_ranges; + auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; + auto& lvl_stripe_sizes = _file_itm_data.lvl_stripe_sizes; + auto& lvl_stream_info = _file_itm_data.lvl_stream_info; auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_ranges; - // Logically view streams as columns - _file_itm_data.lvl_stream_info.resize(_selected_columns.num_levels()); - - // TODO: handle large number of stripes. - // Get the total number of stripes across all input files. - auto const num_stripes = selected_stripes.size(); + lvl_stripe_data.resize(num_levels); + lvl_stripe_sizes.resize(num_levels); + lvl_stream_info.resize(num_levels); + lvl_stripe_stream_chunks.resize(num_levels); + _file_itm_data.lvl_data_chunks.resize(num_levels); + _out_buffers.resize(num_levels); -#ifdef LOCAL_TEST - printf("num load stripe: %d\n", (int)num_stripes); -#endif + auto& read_info = _file_itm_data.data_read_info; + auto& col_meta = *_col_meta; - stripe_data_read_chunks.resize(num_stripes); - lvl_stripe_stream_chunks.resize(_selected_columns.num_levels()); + for (std::size_t level = 0; level < num_levels; ++level) { + lvl_stripe_sizes[level].resize(num_total_stripes); + lvl_stripe_stream_chunks[level].resize(num_total_stripes); - // TODO: move this - auto& lvl_chunks = _file_itm_data.lvl_data_chunks; - lvl_chunks.resize(_selected_columns.num_levels()); - _out_buffers.resize(_selected_columns.num_levels()); - - // TODO: Check if these data depends on pass and subpass, instead of global pass. - // Prepare data. - // Iterates through levels of nested columns, child column will be one level down - // compared to parent column. - auto& col_meta = *_col_meta; - for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { - auto& columns_level = _selected_columns.levels[level]; // Association between each ORC column and its cudf::column col_meta.orc_col_map.emplace_back(_metadata.get_num_cols(), -1); size_type col_id{0}; - for (auto& col : columns_level) { + for (auto const& col : _selected_columns.levels[level]) { // Map each ORC column to its column col_meta.orc_col_map[level][col.id] = col_id++; } - // auto& stripe_data = lvl_stripe_data[level]; - // stripe_data.resize(num_stripes); - - auto& stream_info = _file_itm_data.lvl_stream_info[level]; auto const num_columns = _selected_columns.levels[level].size(); - auto& stripe_sizes = lvl_stripe_sizes[level]; - stream_info.reserve(selected_stripes.size() * num_columns); // final size is unknown - stripe_sizes.resize(selected_stripes.size()); - if (read_info.capacity() < selected_stripes.size()) { - read_info.reserve(selected_stripes.size() * num_columns); // final size is unknown + // Try to reserve some memory, but the final size is unknown, + // since each column may have more than one stream. + lvl_stream_info[level].reserve(num_total_stripes * num_columns); + if (read_info.capacity() < num_total_stripes * num_columns) { + read_info.reserve(num_total_stripes * num_columns); } - - auto& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; - stripe_stream_chunks.resize(num_stripes); } - cudf::detail::hostdevice_vector total_stripe_sizes(num_stripes, _stream); + // + // Load all stripes' metadata. + // + cudf::detail::hostdevice_vector total_stripe_sizes(num_total_stripes, _stream); + auto const& selected_stripes = _file_itm_data.selected_stripes; // Compute input size for each stripe. - for (std::size_t stripe_idx = 0; stripe_idx < num_stripes; ++stripe_idx) { + for (std::size_t stripe_idx = 0; stripe_idx < num_total_stripes; ++stripe_idx) { auto const& stripe = selected_stripes[stripe_idx]; auto const stripe_info = stripe.stripe_info; auto const stripe_footer = stripe.stripe_footer; @@ -412,15 +402,20 @@ void reader::impl::global_preprocess(read_mode mode) stripe_data_read_chunks[stripe_idx] = range{last_read_size, read_info.size()}; } + // + // Compute stripes' data sizes, and split list of all stripes into subsets that be loaded + // separately without blowing up memory: + // + _chunk_read_data.curr_load_stripe_range = 0; // Load all chunks if there is no read limit. if (_chunk_read_data.data_read_limit == 0) { #ifdef LOCAL_TEST - printf("0 limit: output load stripe chunk = 0, %d\n", (int)num_stripes); + printf("0 limit: output load stripe chunk = 0, %d\n", (int)num_total_stripes); #endif - _chunk_read_data.load_stripe_ranges = {range{0ul, num_stripes}}; + _chunk_read_data.load_stripe_ranges = {range{0ul, num_total_stripes}}; return; } @@ -461,11 +456,11 @@ void reader::impl::global_preprocess(read_mode mode) return tmp > 0UL ? tmp : 1UL; }(); _chunk_read_data.load_stripe_ranges = - find_splits(total_stripe_sizes, num_stripes, load_limit); + find_splits(total_stripe_sizes, num_total_stripes, load_limit); #ifdef LOCAL_TEST auto& splits = _chunk_read_data.load_stripe_ranges; - printf("------------\nSplits (/total num stripe = %d): \n", (int)num_stripes); + printf("------------\nSplits (/total num stripe = %d): \n", (int)num_total_stripes); for (size_t idx = 0; idx < splits.size(); idx++) { printf("{%ld, %ld}\n", splits[idx].begin, splits[idx].end); } From 1206ba1e21b9c4e0f62b32b6e510223c722a536c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 9 Mar 2024 21:04:57 -0800 Subject: [PATCH 209/321] Cleanup and rename variables Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 67 +++++++++++++------------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 5112a628b9f..62cfa73cb47 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -288,7 +288,8 @@ void reader::impl::global_preprocess(read_mode mode) } #endif - auto const num_total_stripes = _file_itm_data.selected_stripes.size(); + auto const& selected_stripes = _file_itm_data.selected_stripes; + auto const num_total_stripes = selected_stripes.size(); auto const num_levels = _selected_columns.num_levels(); #ifdef LOCAL_TEST @@ -343,23 +344,22 @@ void reader::impl::global_preprocess(read_mode mode) // Load all stripes' metadata. // cudf::detail::hostdevice_vector total_stripe_sizes(num_total_stripes, _stream); - auto const& selected_stripes = _file_itm_data.selected_stripes; - // Compute input size for each stripe. - for (std::size_t stripe_idx = 0; stripe_idx < num_total_stripes; ++stripe_idx) { - auto const& stripe = selected_stripes[stripe_idx]; + for (std::size_t stripe_global_idx = 0; stripe_global_idx < num_total_stripes; + ++stripe_global_idx) { + auto const& stripe = selected_stripes[stripe_global_idx]; auto const stripe_info = stripe.stripe_info; auto const stripe_footer = stripe.stripe_footer; - std::size_t total_stripe_size{0}; + std::size_t stripe_size{0}; auto const last_read_size = read_info.size(); - for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + for (std::size_t level = 0; level < num_levels; ++level) { auto& stream_info = _file_itm_data.lvl_stream_info[level]; auto& stripe_sizes = lvl_stripe_sizes[level]; - auto stream_count = stream_info.size(); - auto const stripe_size = - gather_stream_info_and_column_desc(stripe_idx, + auto stream_level_count = stream_info.size(); + auto const stripe_level_size = + gather_stream_info_and_column_desc(stripe_global_idx, level, stripe_info, stripe_footer, @@ -368,43 +368,42 @@ void reader::impl::global_preprocess(read_mode mode) false, // use_index, level == 0, nullptr, // num_dictionary_entries - nullptr, // stream_idx + nullptr, // stream_processing_order &stream_info, std::nullopt // chunks ); - auto const is_stripe_data_empty = stripe_size == 0; + auto const is_stripe_data_empty = stripe_level_size == 0; CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, "Invalid index rowgroup stream data"); - stripe_sizes[stripe_idx] = stripe_size; - total_stripe_size += stripe_size; + stripe_sizes[stripe_global_idx] = stripe_level_size; + stripe_size += stripe_level_size; - auto& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; - stripe_stream_chunks[stripe_idx] = range{stream_count, stream_info.size()}; + auto& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; + stripe_stream_chunks[stripe_global_idx] = range{stream_level_count, stream_info.size()}; // Coalesce consecutive streams into one read - while (not is_stripe_data_empty and stream_count < stream_info.size()) { - auto const d_dst = stream_info[stream_count].dst_pos; - auto const offset = stream_info[stream_count].offset; - auto len = stream_info[stream_count].length; - stream_count++; - - while (stream_count < stream_info.size() && - stream_info[stream_count].offset == offset + len) { - len += stream_info[stream_count].length; - stream_count++; + while (not is_stripe_data_empty and stream_level_count < stream_info.size()) { + auto const d_dst = stream_info[stream_level_count].dst_pos; + auto const offset = stream_info[stream_level_count].offset; + auto len = stream_info[stream_level_count].length; + stream_level_count++; + + while (stream_level_count < stream_info.size() && + stream_info[stream_level_count].offset == offset + len) { + len += stream_info[stream_level_count].length; + stream_level_count++; } - read_info.emplace_back(offset, d_dst, len, stripe.source_idx, stripe_idx, level); + read_info.emplace_back(offset, d_dst, len, stripe.source_idx, stripe_global_idx, level); } } - total_stripe_sizes[stripe_idx] = {1, total_stripe_size}; - stripe_data_read_chunks[stripe_idx] = range{last_read_size, read_info.size()}; + total_stripe_sizes[stripe_global_idx] = {1, stripe_size}; + stripe_data_read_chunks[stripe_global_idx] = range{last_read_size, read_info.size()}; } // - // Compute stripes' data sizes, and split list of all stripes into subsets that be loaded - // separately without blowing up memory: + // Split list of all stripes into subsets that be loaded separately without blowing up memory: // _chunk_read_data.curr_load_stripe_range = 0; @@ -429,9 +428,9 @@ void reader::impl::global_preprocess(read_mode mode) } #endif - // Compute the prefix sum of stripe data sizes. + // Compute the prefix sum of stripes' data sizes. total_stripe_sizes.host_to_device_async(_stream); - thrust::inclusive_scan(rmm::exec_policy(_stream), + thrust::inclusive_scan(rmm::exec_policy(_stream), // todo no sync total_stripe_sizes.d_begin(), total_stripe_sizes.d_end(), total_stripe_sizes.d_begin(), @@ -449,10 +448,10 @@ void reader::impl::global_preprocess(read_mode mode) } #endif - // If `data_read_limit` is too small, make sure not to pass 0 byte limit to compute splits. auto const load_limit = [&] { auto const tmp = static_cast(_chunk_read_data.data_read_limit * chunk_read_data::load_limit_ratio); + // Make sure not to pass 0 byte limit (due to round-off) to compute splits. return tmp > 0UL ? tmp : 1UL; }(); _chunk_read_data.load_stripe_ranges = From 71386a2a1e5813d6be421f055f6bfc457635b841 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 9 Mar 2024 22:13:42 -0800 Subject: [PATCH 210/321] Cleanup heavily Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 78 ++++++++++++-------------- 1 file changed, 36 insertions(+), 42 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 62cfa73cb47..44eb8b4b339 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -380,8 +380,8 @@ void reader::impl::global_preprocess(read_mode mode) stripe_sizes[stripe_global_idx] = stripe_level_size; stripe_size += stripe_level_size; - auto& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; - stripe_stream_chunks[stripe_global_idx] = range{stream_level_count, stream_info.size()}; + lvl_stripe_stream_chunks[level][stripe_global_idx] = + range{stream_level_count, stream_info.size()}; // Coalesce consecutive streams into one read while (not is_stripe_data_empty and stream_level_count < stream_info.size()) { @@ -472,62 +472,58 @@ void reader::impl::load_data() { if (_file_itm_data.has_no_data()) { return; } - // auto const rows_to_read = _file_itm_data.rows_to_read; - auto const& selected_stripes = _file_itm_data.selected_stripes; - auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; - auto& lvl_stripe_sizes = _file_itm_data.lvl_stripe_sizes; - auto& read_info = _file_itm_data.data_read_info; - - // std::size_t num_stripes = selected_stripes.size(); - auto const stripe_chunk = + auto const load_stripe_range = _chunk_read_data.load_stripe_ranges[_chunk_read_data.curr_load_stripe_range++]; - auto const stripe_start = stripe_chunk.begin; - auto const stripe_end = stripe_chunk.end; + auto const stripe_start = load_stripe_range.begin; + auto const stripe_end = load_stripe_range.end; auto const stripe_count = stripe_end - stripe_start; + auto const num_levels = _selected_columns.num_levels(); + #ifdef LOCAL_TEST printf("\n\nloading data from stripe %d -> %d\n", (int)stripe_start, (int)stripe_end); #endif + auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; + // Prepare the buffer to read raw data onto. - // TODO: clear all old buffer. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto& stripe_data = lvl_stripe_data[level]; stripe_data.resize(stripe_count); - auto& stripe_sizes = lvl_stripe_sizes[level]; - for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { - // TODO: only do this if it was not allocated before. - stripe_data[stripe_idx - stripe_start] = rmm::device_buffer( - cudf::util::round_up_safe(stripe_sizes[stripe_idx], BUFFER_PADDING_MULTIPLE), _stream); + for (std::size_t idx = 0; idx < stripe_count; ++idx) { + auto const stripe_size = _file_itm_data.lvl_stripe_sizes[level][idx + stripe_start]; + stripe_data[idx] = rmm::device_buffer( + cudf::util::round_up_safe(stripe_size, BUFFER_PADDING_MULTIPLE), _stream); } } + // Load stripe data into memory. std::vector> host_read_buffers; std::vector, std::size_t>> read_tasks; - - auto const& stripe_data_read_chunks = _file_itm_data.stripe_data_read_ranges; - auto const [read_begin, read_end] = get_range(stripe_data_read_chunks, stripe_chunk); + auto const [read_begin, read_end] = + get_range(_file_itm_data.stripe_data_read_ranges, load_stripe_range); for (auto read_idx = read_begin; read_idx < read_end; ++read_idx) { - auto const& read = read_info[read_idx]; - auto& stripe_data = lvl_stripe_data[read.level]; - auto dst_base = static_cast(stripe_data[read.stripe_idx - stripe_start].data()); + auto const& read_info = _file_itm_data.data_read_info[read_idx]; + auto const source = _metadata.per_file_metadata[read_info.source_idx].source; + auto const dst_base = static_cast( + lvl_stripe_data[read_info.level][read_info.stripe_idx - stripe_start].data()); - if (_metadata.per_file_metadata[read.source_idx].source->is_device_read_preferred( - read.length)) { + if (source->is_device_read_preferred(read_info.length)) { read_tasks.push_back( - std::pair(_metadata.per_file_metadata[read.source_idx].source->device_read_async( - read.offset, read.length, dst_base + read.dst_pos, _stream), - read.length)); + std::pair(source->device_read_async( + read_info.offset, read_info.length, dst_base + read_info.dst_pos, _stream), + read_info.length)); } else { - auto buffer = - _metadata.per_file_metadata[read.source_idx].source->host_read(read.offset, read.length); - CUDF_EXPECTS(buffer->size() == read.length, "Unexpected discrepancy in bytes read."); - CUDF_CUDA_TRY(cudaMemcpyAsync( - dst_base + read.dst_pos, buffer->data(), read.length, cudaMemcpyDefault, _stream.value())); - // _stream.synchronize(); + auto buffer = source->host_read(read_info.offset, read_info.length); + CUDF_EXPECTS(buffer->size() == read_info.length, "Unexpected discrepancy in bytes read."); + CUDF_CUDA_TRY(cudaMemcpyAsync(dst_base + read_info.dst_pos, + buffer->data(), + read_info.length, + cudaMemcpyDefault, + _stream.value())); host_read_buffers.emplace_back(std::move(buffer)); } } @@ -537,8 +533,6 @@ void reader::impl::load_data() CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); } - auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_ranges; - // TODO: This is subpass // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. stream_source_map stream_compinfo_map; @@ -546,7 +540,7 @@ void reader::impl::load_data() cudf::detail::hostdevice_vector stripe_decomp_sizes(stripe_count, _stream); for (std::size_t stripe_idx = 0; stripe_idx < stripe_count; ++stripe_idx) { - auto const& stripe = selected_stripes[stripe_idx]; + auto const& stripe = _file_itm_data.selected_stripes[stripe_idx]; auto const stripe_info = stripe.stripe_info; stripe_decomp_sizes[stripe_idx] = cumulative_size_and_row{1, 0, stripe_info->numberOfRows}; @@ -564,9 +558,9 @@ void reader::impl::load_data() auto& stripe_data = lvl_stripe_data[level]; if (stripe_data.empty()) { continue; } - auto const& stripe_stream_chunks = lvl_stripe_stream_chunks[level]; - auto const [stream_begin, stream_end] = get_range(stripe_stream_chunks, stripe_chunk); - auto const num_streams = stream_end - stream_begin; + auto const [stream_begin, stream_end] = + get_range(_file_itm_data.lvl_stripe_stream_ranges[level], load_stripe_range); + auto const num_streams = stream_end - stream_begin; // Setup row group descriptors if using indexes if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { @@ -670,7 +664,7 @@ void reader::impl::load_data() printf("0 limit: output decode stripe chunk unchanged\n"); #endif - _chunk_read_data.decode_stripe_ranges = {stripe_chunk}; + _chunk_read_data.decode_stripe_ranges = {load_stripe_range}; return; } From 17c3393729e82ced96df0e1364d156a0765fb0d2 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 10 Mar 2024 10:16:11 -0700 Subject: [PATCH 211/321] Continue cleaning up Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 44 ++++++++++++++++---------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 44eb8b4b339..395fc345160 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -299,8 +299,8 @@ void reader::impl::global_preprocess(read_mode mode) // // Pre allocate necessary memory for data processed in the next steps: // - auto& stripe_data_read_chunks = _file_itm_data.stripe_data_read_ranges; - stripe_data_read_chunks.resize(num_total_stripes); + auto& stripe_data_read_ranges = _file_itm_data.stripe_data_read_ranges; + stripe_data_read_ranges.resize(num_total_stripes); auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; auto& lvl_stripe_sizes = _file_itm_data.lvl_stripe_sizes; @@ -343,6 +343,8 @@ void reader::impl::global_preprocess(read_mode mode) // // Load all stripes' metadata. // + + // Collect total data size for all data streams in each stripe. cudf::detail::hostdevice_vector total_stripe_sizes(num_total_stripes, _stream); for (std::size_t stripe_global_idx = 0; stripe_global_idx < num_total_stripes; @@ -399,7 +401,7 @@ void reader::impl::global_preprocess(read_mode mode) } } total_stripe_sizes[stripe_global_idx] = {1, stripe_size}; - stripe_data_read_chunks[stripe_global_idx] = range{last_read_size, read_info.size()}; + stripe_data_read_ranges[stripe_global_idx] = range{last_read_size, read_info.size()}; } // @@ -498,9 +500,18 @@ void reader::impl::load_data() } } - // Load stripe data into memory. + // + // Load stripe data into memory: + // + + // After loading data from sources into host buffers, we need to transfer (async) data to device. + // Such host buffers need to be kept alive until we sync device. std::vector> host_read_buffers; + + // If we load data directly from sources into device, we also need to the entire read tasks. + // Thus, we need to keep all read tasks alive and sync all together. std::vector, std::size_t>> read_tasks; + auto const [read_begin, read_end] = get_range(_file_itm_data.stripe_data_read_ranges, load_stripe_range); @@ -533,28 +544,29 @@ void reader::impl::load_data() CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); } - // TODO: This is subpass - // TODO: Don't have to keep it for all stripe/level. Can reset it after each iter. + // + // Split list of all stripes into subsets that be loaded separately without blowing up memory: + // + + // A map from stripe source into `CompressedStreamInfo*` which are generated during parsing + // streams decompressed sizes. + // These pointers are then used to populate stripe/level decompressed sizes for later + // decompression and decoding. stream_source_map stream_compinfo_map; + // For estimating the decompressed sizes of the loaded stripes. cudf::detail::hostdevice_vector stripe_decomp_sizes(stripe_count, _stream); for (std::size_t stripe_idx = 0; stripe_idx < stripe_count; ++stripe_idx) { - auto const& stripe = _file_itm_data.selected_stripes[stripe_idx]; - auto const stripe_info = stripe.stripe_info; - + auto const& stripe = _file_itm_data.selected_stripes[stripe_idx]; + auto const stripe_info = stripe.stripe_info; stripe_decomp_sizes[stripe_idx] = cumulative_size_and_row{1, 0, stripe_info->numberOfRows}; - // printf("loading stripe with rows = %d\n", (int)stripe_info->numberOfRows); } - // std::fill( - // stripe_decomp_sizes.begin(), stripe_decomp_sizes.end(), cumulative_size_and_row{1, 0, 0}); - // Parse the decompressed sizes for each stripe. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { - auto& stream_info = _file_itm_data.lvl_stream_info[level]; - auto const num_columns = _selected_columns.levels[level].size(); + auto const& stream_info = _file_itm_data.lvl_stream_info[level]; + auto const num_columns = _selected_columns.levels[level].size(); - // Tracker for eventually deallocating compressed and uncompressed data auto& stripe_data = lvl_stripe_data[level]; if (stripe_data.empty()) { continue; } From 86e429f474fa39ddc858364bf5dfb98c850498f4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 10 Mar 2024 19:29:41 -0700 Subject: [PATCH 212/321] Cleanup and add docs Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 72 ++++++++++++-------------- 1 file changed, 32 insertions(+), 40 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 395fc345160..982fae2efb8 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -548,21 +548,24 @@ void reader::impl::load_data() // Split list of all stripes into subsets that be loaded separately without blowing up memory: // - // A map from stripe source into `CompressedStreamInfo*` which are generated during parsing - // streams decompressed sizes. - // These pointers are then used to populate stripe/level decompressed sizes for later + // A map from stripe source into `CompressedStreamInfo*` pointer. + // These pointers are then used to retrieve stripe/level decompressed sizes for later // decompression and decoding. stream_source_map stream_compinfo_map; // For estimating the decompressed sizes of the loaded stripes. cudf::detail::hostdevice_vector stripe_decomp_sizes(stripe_count, _stream); + std::size_t num_loaded_stripes{0}; for (std::size_t stripe_idx = 0; stripe_idx < stripe_count; ++stripe_idx) { auto const& stripe = _file_itm_data.selected_stripes[stripe_idx]; auto const stripe_info = stripe.stripe_info; stripe_decomp_sizes[stripe_idx] = cumulative_size_and_row{1, 0, stripe_info->numberOfRows}; + num_loaded_stripes += stripe_info->numberOfRows; } + auto& compinfo_map = _file_itm_data.compinfo_map; + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto const& stream_info = _file_itm_data.lvl_stream_info[level]; auto const num_columns = _selected_columns.levels[level].size(); @@ -574,26 +577,23 @@ void reader::impl::load_data() get_range(_file_itm_data.lvl_stripe_stream_ranges[level], load_stripe_range); auto const num_streams = stream_end - stream_begin; - // Setup row group descriptors if using indexes if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; - // Cannot be cached, since this is for streams in a loaded stripe chunk, while - // the latter decoding step will use a different stripe chunk. + // Cannot be cached as-is, since this is for streams in a loaded stripe range, while + // the latter decompression/decoding step will use a different stripe range. cudf::detail::hostdevice_vector compinfo(0, num_streams, _stream); - // TODO: Instead of all stream info, loop using read_chunk info to process - // only stream info of the curr_load_stripe_chunk. - for (auto stream_idx = stream_begin; stream_idx < stream_end; ++stream_idx) { auto const& info = stream_info[stream_idx]; - compinfo.push_back(gpu::CompressedStreamInfo( - static_cast(stripe_data[info.source.stripe_idx - stripe_start].data()) + - info.dst_pos, - info.length)); + auto const dst_base = + static_cast(stripe_data[info.source.stripe_idx - stripe_start].data()); + + compinfo.push_back(gpu::CompressedStreamInfo(dst_base + info.dst_pos, info.length)); stream_compinfo_map[stream_source_info{ info.source.stripe_idx, info.source.level, info.source.orc_col_idx, info.source.kind}] = &compinfo.back(); + #ifdef LOCAL_TEST printf("collec stream [%d, %d, %d, %d]: dst = %lu, length = %lu\n", (int)info.source.stripe_idx, @@ -607,7 +607,6 @@ void reader::impl::load_data() } compinfo.host_to_device_async(_stream); - gpu::ParseCompressedStripeData(compinfo.device_ptr(), compinfo.size(), decompressor.GetBlockSize(), @@ -615,9 +614,8 @@ void reader::impl::load_data() _stream); compinfo.device_to_host_sync(_stream); - auto& compinfo_map = _file_itm_data.compinfo_map; for (auto& [stream_id, stream_compinfo] : stream_compinfo_map) { - // Cache these parsed numbers so they can be reused in the decoding step. + // Cache these parsed numbers so they can be reused in the decompression/decoding step. compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, stream_compinfo->num_uncompressed_blocks, stream_compinfo->max_uncompressed_size}; @@ -637,7 +635,7 @@ void reader::impl::load_data() #endif } - // Must clear map since the next level will have similar keys. + // Important: must clear this map since the next level will have similar keys. stream_compinfo_map.clear(); } else { @@ -657,21 +655,18 @@ void reader::impl::load_data() } // end loop level - // Decoding is reset to start from the first chunk in `decode_stripe_chunks`. + // Decoding range is reset to start from the first position in `decode_stripe_ranges`. _chunk_read_data.curr_decode_stripe_range = 0; - // Decode all chunks if there is no read and no output limit. - // In theory, we should just decode enough stripes for output one table chunk. - // However, we do not know the output size of each stripe after decompressing and decoding, - // thus we have to process all loaded chunks. - // That is because the estimated `max_uncompressed_size` of stream data from - // `ParseCompressedStripeData` is just the approximate of the maximum possible size, not the - // actual size, which can be much smaller in practice. - - // TODO: docs on handle size overflow + // Decode all loaded stripes if there is no read limit. + // In theory, we should just decode enough stripes for output one table chunk, instead of + // decoding all stripes like this. + // However, we do not know how many stripes are 'enough' because there is not any simple and + // cheap way to compute the exact decoded sizes of stripes. if (_chunk_read_data.data_read_limit == 0 && - // TODO: rows_to_read is changed every decode, should we change this? - _file_itm_data.rows_to_read < static_cast(std::numeric_limits::max())) { + // In addition to not have any read limit, we also need to check if the the total number of + // rows in the loaded stripes exceeds column size limit. + num_loaded_stripes < static_cast(std::numeric_limits::max())) { #ifdef LOCAL_TEST printf("0 limit: output decode stripe chunk unchanged\n"); #endif @@ -693,14 +688,13 @@ void reader::impl::load_data() } #endif - // Compute the prefix sum of stripe data sizes. + // Compute the prefix sum of stripe data sizes and rows. stripe_decomp_sizes.host_to_device_async(_stream); thrust::inclusive_scan(rmm::exec_policy(_stream), stripe_decomp_sizes.d_begin(), stripe_decomp_sizes.d_end(), stripe_decomp_sizes.d_begin(), cumulative_size_sum{}); - stripe_decomp_sizes.device_to_host_sync(_stream); #ifdef LOCAL_TEST @@ -715,9 +709,8 @@ void reader::impl::load_data() #endif auto const decode_limit = [&] { - // In this case, we have no read limit but have to split due to having large input in which - // the number of rows exceed column size limit. - // We will split based on row number, not data size. + // In this case, we have no read limit but have to split due to having number of rows in loaded + // stripes exceeds column size limit. So we will split based on row number, not data size. if (_chunk_read_data.data_read_limit == 0) { return std::numeric_limits::max(); } // If `data_read_limit` is too small, make sure not to pass 0 byte limit to compute splits. @@ -727,6 +720,10 @@ void reader::impl::load_data() }(); _chunk_read_data.decode_stripe_ranges = find_splits(stripe_decomp_sizes, stripe_count, decode_limit); + + // The split ranges always start from zero. + // We need to update the ranges to start from `stripe_start` which is covererd by the current + // range of loaded stripes. for (auto& chunk : _chunk_read_data.decode_stripe_ranges) { chunk.begin += stripe_start; chunk.end += stripe_start; @@ -738,16 +735,11 @@ void reader::impl::load_data() for (size_t idx = 0; idx < splits.size(); idx++) { printf("{%ld, %ld}\n", splits[idx].begin, splits[idx].end); } - fflush(stdout); -#endif - // lvl_stripe_data.clear(); - // _file_itm_data.compinfo_ready = true; - -#ifdef LOCAL_TEST auto peak_mem = mem_stats_logger.peak_memory_usage(); std::cout << "load, peak_memory_usage: " << peak_mem << "(" << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + fflush(stdout); #endif } From c50071959198cbaabc46df6d9e03ec622c2f12b5 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 10 Mar 2024 22:08:28 -0700 Subject: [PATCH 213/321] Rename variables Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 8 ++++---- cpp/src/io/orc/reader_impl_decode.cu | 28 +++++++++++++------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 982fae2efb8..d22fc9e30e6 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -305,12 +305,12 @@ void reader::impl::global_preprocess(read_mode mode) auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; auto& lvl_stripe_sizes = _file_itm_data.lvl_stripe_sizes; auto& lvl_stream_info = _file_itm_data.lvl_stream_info; - auto& lvl_stripe_stream_chunks = _file_itm_data.lvl_stripe_stream_ranges; + auto& lvl_stripe_stream_ranges = _file_itm_data.lvl_stripe_stream_ranges; lvl_stripe_data.resize(num_levels); lvl_stripe_sizes.resize(num_levels); lvl_stream_info.resize(num_levels); - lvl_stripe_stream_chunks.resize(num_levels); + lvl_stripe_stream_ranges.resize(num_levels); _file_itm_data.lvl_data_chunks.resize(num_levels); _out_buffers.resize(num_levels); @@ -319,7 +319,7 @@ void reader::impl::global_preprocess(read_mode mode) for (std::size_t level = 0; level < num_levels; ++level) { lvl_stripe_sizes[level].resize(num_total_stripes); - lvl_stripe_stream_chunks[level].resize(num_total_stripes); + lvl_stripe_stream_ranges[level].resize(num_total_stripes); // Association between each ORC column and its cudf::column col_meta.orc_col_map.emplace_back(_metadata.get_num_cols(), -1); @@ -382,7 +382,7 @@ void reader::impl::global_preprocess(read_mode mode) stripe_sizes[stripe_global_idx] = stripe_level_size; stripe_size += stripe_level_size; - lvl_stripe_stream_chunks[level][stripe_global_idx] = + lvl_stripe_stream_ranges[level][stripe_global_idx] = range{stream_level_count, stream_info.size()}; // Coalesce consecutive streams into one read diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 87fdc40b351..441d0b507f6 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -78,8 +78,8 @@ namespace { * @return Device buffer to decompressed page data */ rmm::device_buffer decompress_stripe_data( - range const& load_stripe_range, - range const& stripe_range, + range const& loaded_stripe_range, + range const& decode_stripe_range, stream_source_map const& compinfo_map, OrcDecompressor const& decompressor, host_span stripe_data, @@ -96,12 +96,11 @@ rmm::device_buffer decompress_stripe_data( std::size_t num_uncompressed_blocks = 0; std::size_t total_decomp_size = 0; - // printf("decompress #stripe: %d, ") - - // TODO: use lvl_stripe_stream_chunks + // TODO: use lvl_stripe_stream_ranges std::size_t count{0}; for (auto const& info : stream_info) { - if (info.source.stripe_idx < stripe_range.begin || info.source.stripe_idx >= stripe_range.end) { + if (info.source.stripe_idx < decode_stripe_range.begin || + info.source.stripe_idx >= decode_stripe_range.end) { continue; } count++; @@ -110,7 +109,8 @@ rmm::device_buffer decompress_stripe_data( cudf::detail::hostdevice_vector compinfo(0, count, stream); for (auto const& info : stream_info) { - if (info.source.stripe_idx < stripe_range.begin || info.source.stripe_idx >= stripe_range.end) { + if (info.source.stripe_idx < decode_stripe_range.begin || + info.source.stripe_idx >= decode_stripe_range.end) { continue; } @@ -127,7 +127,7 @@ rmm::device_buffer decompress_stripe_data( compinfo.push_back(gpu::CompressedStreamInfo( static_cast( - stripe_data[info.source.stripe_idx - load_stripe_range.begin].data()) + + stripe_data[info.source.stripe_idx - loaded_stripe_range.begin].data()) + info.dst_pos, info.length)); @@ -822,11 +822,11 @@ void reader::impl::decompress_and_decode() { if (_file_itm_data.has_no_data()) { return; } - auto const stripe_chunk = + auto const stripe_range = _chunk_read_data.decode_stripe_ranges[_chunk_read_data.curr_decode_stripe_range++]; - auto const stripe_start = stripe_chunk.begin; - auto const stripe_end = stripe_chunk.end; - auto const stripe_count = stripe_chunk.end - stripe_chunk.begin; + auto const stripe_start = stripe_range.begin; + auto const stripe_end = stripe_range.end; + auto const stripe_count = stripe_range.end - stripe_range.begin; auto const load_stripe_start = _chunk_read_data.load_stripe_ranges[_chunk_read_data.curr_load_stripe_range - 1].begin; @@ -980,7 +980,7 @@ void reader::impl::decompress_and_decode() #endif auto const& stripe_stream_ranges = lvl_stripe_stream_ranges[level]; - auto const [stream_begin, stream_end] = get_range(stripe_stream_ranges, stripe_chunk); + auto const [stream_begin, stream_end] = get_range(stripe_stream_ranges, stripe_range); auto& columns_level = _selected_columns.levels[level]; @@ -1245,7 +1245,7 @@ void reader::impl::decompress_and_decode() auto decomp_data = decompress_stripe_data( _chunk_read_data.load_stripe_ranges[_chunk_read_data.curr_load_stripe_range - 1], - stripe_chunk, + stripe_range, _file_itm_data.compinfo_map, *_metadata.per_file_metadata[0].decompressor, stripe_data, From a03cb3de2adf190d80260362551e969f8e109677 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 10 Mar 2024 22:22:34 -0700 Subject: [PATCH 214/321] Change return type of `get_range` Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 11 ++++---- cpp/src/io/orc/reader_impl_chunking.hpp | 3 +- cpp/src/io/orc/reader_impl_decode.cu | 37 +++++++------------------ 3 files changed, 16 insertions(+), 35 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index d22fc9e30e6..92d0f2233db 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -248,8 +248,7 @@ template std::vector find_splits(host_span find_splits( host_span sizes, std::size_t total_count, std::size_t size_limit); -std::pair get_range(std::vector const& input_ranges, - range const& selected_ranges) +range get_range(std::vector const& input_ranges, range const& selected_ranges) { // The first and last range. auto const& first_range = input_ranges[selected_ranges.begin]; @@ -573,9 +572,9 @@ void reader::impl::load_data() auto& stripe_data = lvl_stripe_data[level]; if (stripe_data.empty()) { continue; } - auto const [stream_begin, stream_end] = + auto const stream_range = get_range(_file_itm_data.lvl_stripe_stream_ranges[level], load_stripe_range); - auto const num_streams = stream_end - stream_begin; + auto const num_streams = stream_range.end - stream_range.begin; if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; @@ -584,7 +583,7 @@ void reader::impl::load_data() // the latter decompression/decoding step will use a different stripe range. cudf::detail::hostdevice_vector compinfo(0, num_streams, _stream); - for (auto stream_idx = stream_begin; stream_idx < stream_end; ++stream_idx) { + for (auto stream_idx = stream_range.begin; stream_idx < stream_range.end; ++stream_idx) { auto const& info = stream_info[stream_idx]; auto const dst_base = static_cast(stripe_data[info.source.stripe_idx - stripe_start].data()); @@ -645,7 +644,7 @@ void reader::impl::load_data() #endif // Set decompression size equal to the input size. - for (auto stream_idx = stream_begin; stream_idx < stream_end; ++stream_idx) { + for (auto stream_idx = stream_range.begin; stream_idx < stream_range.end; ++stream_idx) { auto const& info = stream_info[stream_idx]; stripe_decomp_sizes[info.source.stripe_idx - stripe_start].size_bytes += info.length; } diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index ba3611d5757..a3883426787 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -288,8 +288,7 @@ std::vector find_splits(host_span cumulative_sizes, * @param selected_ranges A range of ranges from `input_ranges` * @return The range of data span by the selected range of ranges */ -std::pair get_range(std::vector const& input_ranges, - range const& selected_ranges); +range get_range(std::vector const& input_ranges, range const& selected_ranges); /** * @brief Function that populates descriptors for either individual streams or chunks of column diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 441d0b507f6..10f2192b019 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -79,7 +79,7 @@ namespace { */ rmm::device_buffer decompress_stripe_data( range const& loaded_stripe_range, - range const& decode_stripe_range, + range const& stream_range, stream_source_map const& compinfo_map, OrcDecompressor const& decompressor, host_span stripe_data, @@ -96,23 +96,11 @@ rmm::device_buffer decompress_stripe_data( std::size_t num_uncompressed_blocks = 0; std::size_t total_decomp_size = 0; - // TODO: use lvl_stripe_stream_ranges - std::size_t count{0}; - for (auto const& info : stream_info) { - if (info.source.stripe_idx < decode_stripe_range.begin || - info.source.stripe_idx >= decode_stripe_range.end) { - continue; - } - count++; - } + auto const num_streams = stream_range.end - stream_range.begin; + cudf::detail::hostdevice_vector compinfo(0, num_streams, stream); - cudf::detail::hostdevice_vector compinfo(0, count, stream); - - for (auto const& info : stream_info) { - if (info.source.stripe_idx < decode_stripe_range.begin || - info.source.stripe_idx >= decode_stripe_range.end) { - continue; - } + for (auto stream_idx = stream_range.begin; stream_idx < stream_range.end; ++stream_idx) { + auto const& info = stream_info[stream_idx]; #ifdef LOCAL_TEST // printf("collec stream again [%d, %d, %d, %d]: dst = %lu, length = %lu\n", @@ -979,8 +967,8 @@ void reader::impl::decompress_and_decode() } #endif - auto const& stripe_stream_ranges = lvl_stripe_stream_ranges[level]; - auto const [stream_begin, stream_end] = get_range(stripe_stream_ranges, stripe_range); + auto const& stripe_stream_ranges = lvl_stripe_stream_ranges[level]; + auto const stream_range = get_range(stripe_stream_ranges, stripe_range); auto& columns_level = _selected_columns.levels[level]; @@ -1179,13 +1167,8 @@ void reader::impl::decompress_and_decode() } if (not is_stripe_data_empty) { for (int k = 0; k < gpu::CI_NUM_STREAMS; k++) { - chunk.streams[k] = dst_base + stream_info[chunk.strm_id[k] + stream_begin].dst_pos; - // printf("chunk.streams[%d] of chunk.strm_id[%d], stripe %d | %d, collect from %d\n", - // (int)k, - // (int)chunk.strm_id[k], - // (int)stripe_idx, - // (int)stripe_start, - // (int)(chunk.strm_id[k] + stream_begin)); + chunk.streams[k] = + dst_base + stream_info[chunk.strm_id[k] + stream_range.begin].dst_pos; } } } @@ -1245,7 +1228,7 @@ void reader::impl::decompress_and_decode() auto decomp_data = decompress_stripe_data( _chunk_read_data.load_stripe_ranges[_chunk_read_data.curr_load_stripe_range - 1], - stripe_range, + get_range(_file_itm_data.lvl_stripe_stream_ranges[level], stripe_range), _file_itm_data.compinfo_map, *_metadata.per_file_metadata[0].decompressor, stripe_data, From cebb051b9cc61b655dfd2f68b23e61552fc68414 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 10 Mar 2024 22:33:57 -0700 Subject: [PATCH 215/321] More cleanup Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 6 ++--- cpp/src/io/orc/reader_impl_decode.cu | 31 +++++++++++--------------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 92d0f2233db..2c15e977c28 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -723,9 +723,9 @@ void reader::impl::load_data() // The split ranges always start from zero. // We need to update the ranges to start from `stripe_start` which is covererd by the current // range of loaded stripes. - for (auto& chunk : _chunk_read_data.decode_stripe_ranges) { - chunk.begin += stripe_start; - chunk.end += stripe_start; + for (auto& range : _chunk_read_data.decode_stripe_ranges) { + range.begin += stripe_start; + range.end += stripe_start; } #ifdef LOCAL_TEST diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 10f2192b019..629494569fc 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -71,14 +71,13 @@ namespace { * @param stream_info List of stream to column mappings * @param chunks Vector of list of column chunk descriptors * @param row_groups Vector of list of row index descriptors - * @param num_stripes Number of stripes making up column chunks * @param row_index_stride Distance between each row index * @param use_base_stride Whether to use base stride obtained from meta or use the computed value * @param stream CUDA stream used for device memory operations and kernel launches * @return Device buffer to decompressed page data */ rmm::device_buffer decompress_stripe_data( - range const& loaded_stripe_range, + range const& stripe_range, range const& stream_range, stream_source_map const& compinfo_map, OrcDecompressor const& decompressor, @@ -86,7 +85,6 @@ rmm::device_buffer decompress_stripe_data( host_span stream_info, cudf::detail::hostdevice_2dvector& chunks, cudf::detail::hostdevice_2dvector& row_groups, - size_type num_stripes, size_type row_index_stride, bool use_base_stride, rmm::cuda_stream_view stream) @@ -114,21 +112,14 @@ rmm::device_buffer decompress_stripe_data( #endif compinfo.push_back(gpu::CompressedStreamInfo( - static_cast( - stripe_data[info.source.stripe_idx - loaded_stripe_range.begin].data()) + + static_cast(stripe_data[info.source.stripe_idx - stripe_range.begin].data()) + info.dst_pos, info.length)); - // printf("line %d\n", __LINE__); - // fflush(stdout); auto const& cached_comp_info = compinfo_map.at(stream_source_info{ info.source.stripe_idx, info.source.level, info.source.orc_col_idx, info.source.kind}); - // printf("line %d\n", __LINE__); - // fflush(stdout); - // auto const& cached_comp_info = - // compinfo_map[stream_id_info{info.source.stripe_idx, info.source.level, - // info.source.orc_cold_idx, info.source.kind}]; - auto& stream_comp_info = compinfo.back(); + auto& stream_comp_info = compinfo.back(); + stream_comp_info.num_compressed_blocks = cached_comp_info.num_compressed_blocks; stream_comp_info.num_uncompressed_blocks = cached_comp_info.num_uncompressed_blocks; stream_comp_info.max_uncompressed_size = cached_comp_info.total_decomp_size; @@ -186,7 +177,10 @@ rmm::device_buffer decompress_stripe_data( // Required by `gpuDecodeOrcColumnData`. rmm::device_buffer decomp_data( cudf::util::round_up_safe(total_decomp_size, BUFFER_PADDING_MULTIPLE), stream); - if (decomp_data.is_empty()) { return decomp_data; } + + // If total_decomp_size is zero, the data should not be compressed, and this function + // should not be called at all. + CUDF_EXPECTS(!decomp_data.is_empty(), "Invalid decompression size"); rmm::device_uvector> inflate_in( num_compressed_blocks + num_uncompressed_blocks, stream); @@ -325,15 +319,16 @@ rmm::device_buffer decompress_stripe_data( // We can check on host after stream synchronize CUDF_EXPECTS(not any_block_failure[0], "Error during decompression"); - auto const num_columns = static_cast(chunks.size().second); + auto const num_stripes = stripe_range.end - stripe_range.begin; + auto const num_columns = chunks.size().second; // Update the stream information with the updated uncompressed info // TBD: We could update the value from the information we already // have in stream_info[], but using the gpu results also updates // max_uncompressed_size to the actual uncompressed size, or zero if // decompression failed. - for (size_type i = 0; i < num_stripes; ++i) { - for (size_type j = 0; j < num_columns; ++j) { + for (std::size_t i = 0; i < num_stripes; ++i) { + for (std::size_t j = 0; j < num_columns; ++j) { auto& chunk = chunks[i][j]; for (int k = 0; k < gpu::CI_NUM_STREAMS; ++k) { if (chunk.strm_len[k] > 0 && chunk.strm_id[k] < compinfo.size()) { @@ -821,6 +816,7 @@ void reader::impl::decompress_and_decode() #ifdef LOCAL_TEST printf("\ndecoding data from stripe %d -> %d\n", (int)stripe_start, (int)stripe_end); + printf("\n loaded stripe start %d \n", (int)load_stripe_start); #endif auto const rows_to_skip = _file_itm_data.rows_to_skip; @@ -1235,7 +1231,6 @@ void reader::impl::decompress_and_decode() stream_info, chunks, row_groups, - stripe_count, _metadata.get_row_index_stride(), level == 0, _stream); From a0492fde02469c52bee328306684eb9a57b8a54c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 09:22:36 -0700 Subject: [PATCH 216/321] Fix num stripes Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_decode.cu | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 629494569fc..22dc684588a 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -77,8 +77,9 @@ namespace { * @return Device buffer to decompressed page data */ rmm::device_buffer decompress_stripe_data( - range const& stripe_range, + range const& loaded_stripe_range, range const& stream_range, + std::size_t num_decode_stripes, stream_source_map const& compinfo_map, OrcDecompressor const& decompressor, host_span stripe_data, @@ -112,7 +113,8 @@ rmm::device_buffer decompress_stripe_data( #endif compinfo.push_back(gpu::CompressedStreamInfo( - static_cast(stripe_data[info.source.stripe_idx - stripe_range.begin].data()) + + static_cast( + stripe_data[info.source.stripe_idx - loaded_stripe_range.begin].data()) + info.dst_pos, info.length)); @@ -319,7 +321,6 @@ rmm::device_buffer decompress_stripe_data( // We can check on host after stream synchronize CUDF_EXPECTS(not any_block_failure[0], "Error during decompression"); - auto const num_stripes = stripe_range.end - stripe_range.begin; auto const num_columns = chunks.size().second; // Update the stream information with the updated uncompressed info @@ -327,7 +328,7 @@ rmm::device_buffer decompress_stripe_data( // have in stream_info[], but using the gpu results also updates // max_uncompressed_size to the actual uncompressed size, or zero if // decompression failed. - for (std::size_t i = 0; i < num_stripes; ++i) { + for (std::size_t i = 0; i < num_decode_stripes; ++i) { for (std::size_t j = 0; j < num_columns; ++j) { auto& chunk = chunks[i][j]; for (int k = 0; k < gpu::CI_NUM_STREAMS; ++k) { @@ -346,7 +347,7 @@ rmm::device_buffer decompress_stripe_data( compinfo.device_ptr(), chunks.base_device_ptr(), num_columns, - num_stripes, + num_decode_stripes, row_index_stride, use_base_stride, stream); @@ -1225,6 +1226,7 @@ void reader::impl::decompress_and_decode() auto decomp_data = decompress_stripe_data( _chunk_read_data.load_stripe_ranges[_chunk_read_data.curr_load_stripe_range - 1], get_range(_file_itm_data.lvl_stripe_stream_ranges[level], stripe_range), + stripe_count, _file_itm_data.compinfo_map, *_metadata.per_file_metadata[0].decompressor, stripe_data, From d2e892d7bc9198ef0cd317628e1d08d7695a616c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 09:31:45 -0700 Subject: [PATCH 217/321] Update docs Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_decode.cu | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 22dc684588a..04b48742f64 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -61,11 +61,17 @@ namespace cudf::io::orc::detail { namespace { -// TODO: update -// TODO: compute num stripes from chunks /** - * @brief Decompresses the stripe data, at stream granularity. + * @brief Decompresses the stripe data, at stream granularity. * + * Only the streams in the provided `stream_range` are decoded. That range is determined in + * the previous steps, after splitting stripes into subsets to maintain memory usage to be + * under data read limit. + * + * @param loaded_stripe_range Range of stripes that are already loaded in memory + * @param stream_range Range of streams to be decoded + * @param num_decoded_stripes Number of stripes that the decoding streams belong to + * @param compinfo_map A map to lookup compression info of streams * @param decompressor Block decompressor * @param stripe_data List of source stripe column data * @param stream_info List of stream to column mappings @@ -74,7 +80,7 @@ namespace { * @param row_index_stride Distance between each row index * @param use_base_stride Whether to use base stride obtained from meta or use the computed value * @param stream CUDA stream used for device memory operations and kernel launches - * @return Device buffer to decompressed page data + * @return Device buffer to decompressed data */ rmm::device_buffer decompress_stripe_data( range const& loaded_stripe_range, @@ -208,13 +214,13 @@ rmm::device_buffer decompress_stripe_data( compinfo[i].copy_in_ctl = inflate_in.data() + start_pos_uncomp; compinfo[i].copy_out_ctl = inflate_out.data() + start_pos_uncomp; - // stream_info[i].dst_pos = decomp_offset; decomp_offset += compinfo[i].max_uncompressed_size; start_pos += compinfo[i].num_compressed_blocks; start_pos_uncomp += compinfo[i].num_uncompressed_blocks; max_uncomp_block_size = std::max(max_uncomp_block_size, compinfo[i].max_uncompressed_block_size); } + compinfo.host_to_device_async(stream); gpu::ParseCompressedStripeData(compinfo.device_ptr(), compinfo.size(), @@ -290,8 +296,6 @@ rmm::device_buffer decompress_stripe_data( default: CUDF_FAIL("Unexpected decompression dispatch"); break; } - // TODO: proclam return type - // Check if any block has been failed to decompress. // Not using `thrust::any` or `thrust::count_if` to defer stream sync. thrust::for_each( From 5e4b16f7c9cabadd25311d0fa43174a63cb14be0 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 10:59:10 -0700 Subject: [PATCH 218/321] Cleanup and add docs Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_decode.cu | 35 +++++++++------------------- 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 04b48742f64..13c0ab1f637 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -597,12 +597,10 @@ void scan_null_counts(cudf::detail::hostdevice_2dvector const& stream.synchronize(); } -// TODO: this is called for each chunk of stripes. /** * @brief Aggregate child metadata from parent column chunks. */ -void aggregate_child_meta(std::size_t stripe_start, - std::size_t level, +void aggregate_child_meta(std::size_t level, cudf::io::orc::detail::column_hierarchy const& selected_columns, cudf::detail::host_2dspan chunks, cudf::detail::host_2dspan row_groups, @@ -637,10 +635,7 @@ void aggregate_child_meta(std::size_t stripe_start, // For each parent column, update its child column meta for each stripe. std::for_each(nested_cols.begin(), nested_cols.end(), [&](auto const p_col) { - // printf("p_col.id: %d\n", (int)p_col.id); - auto const parent_col_idx = col_meta.orc_col_map[level][p_col.id]; - // printf(" level: %d, parent_col_idx: %d\n", (int)level, (int)parent_col_idx); int64_t start_row = 0; auto processed_row_groups = 0; @@ -648,8 +643,6 @@ void aggregate_child_meta(std::size_t stripe_start, for (std::size_t stripe_id = 0; stripe_id < num_of_stripes; stripe_id++) { // Aggregate num_rows and start_row from processed parent columns per row groups if (num_of_rowgroups) { - // printf(" num_of_rowgroups: %d\n", (int)num_of_rowgroups); - auto stripe_num_row_groups = chunks[stripe_id][parent_col_idx].num_rowgroups; auto processed_child_rows = 0; @@ -667,24 +660,24 @@ void aggregate_child_meta(std::size_t stripe_start, // Aggregate start row, number of rows per chunk and total number of rows in a column auto const child_rows = chunks[stripe_id][parent_col_idx].num_child_rows; - // printf(" stripe_id: %d: child_rows: %d\n", (int)stripe_id, (int)child_rows); - // printf(" p_col.num_children: %d\n", (int)p_col.num_children); for (size_type id = 0; id < p_col.num_children; id++) { auto const child_col_idx = index + id; - // TODO: Check for overflow here. num_child_rows[child_col_idx] += child_rows; + + // The number of rows in child column should not be very large otherwise we will have + // size overflow. + // If that is the case, we need to set a read limit to reduce number of decoding stripes. + CUDF_EXPECTS(num_child_rows[child_col_idx] <= + static_cast(std::numeric_limits::max()), + "Number of rows in the child column exceeds column size limit."); + num_child_rows_per_stripe[stripe_id][child_col_idx] = child_rows; // start row could be different for each column when there is nesting at each stripe level child_start_row[stripe_id][child_col_idx] = (stripe_id == 0) ? 0 : start_row; - // printf("update child_start_row (%d, %d): %d\n", - // (int)stripe_id, - // (int)child_col_idx, - // (int)start_row); } start_row += child_rows; - // printf(" start_row: %d\n", (int)start_row); } // Parent column null mask and null count would be required for child column @@ -1390,14 +1383,8 @@ void reader::impl::decompress_and_decode() scan_null_counts(chunks, null_count_prefix_sums[level], _stream); row_groups.device_to_host_sync(_stream); - aggregate_child_meta(stripe_start, - level, - _selected_columns, - chunks, - row_groups, - nested_cols, - _out_buffers[level], - col_meta); + aggregate_child_meta( + level, _selected_columns, chunks, row_groups, nested_cols, _out_buffers[level], col_meta); // ORC stores number of elements at each row, so we need to generate offsets from that std::vector buff_data; From 3ec50ef9999106dd06e540815c39787cd824cb9f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 12:22:25 -0700 Subject: [PATCH 219/321] Cleanup, docs, and rename variables Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 2 + cpp/src/io/orc/reader_impl_decode.cu | 77 ++++++++++++++------------ 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 2c15e977c28..9ea03fd2e56 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -429,6 +429,7 @@ void reader::impl::global_preprocess(read_mode mode) } #endif + // TODO: exec_policy_nosync // Compute the prefix sum of stripes' data sizes. total_stripe_sizes.host_to_device_async(_stream); thrust::inclusive_scan(rmm::exec_policy(_stream), // todo no sync @@ -687,6 +688,7 @@ void reader::impl::load_data() } #endif + // TODO: exec_policy_nosync // Compute the prefix sum of stripe data sizes and rows. stripe_decomp_sizes.host_to_device_async(_stream); thrust::inclusive_scan(rmm::exec_policy(_stream), diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 13c0ab1f637..e85389e4c9a 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -576,6 +576,7 @@ void scan_null_counts(cudf::detail::hostdevice_2dvector const& auto const d_prefix_sums_to_update = cudf::detail::make_device_uvector_async( prefix_sums_to_update, stream, rmm::mr::get_current_device_resource()); + // TODO: exec_policy_nosync thrust::for_each(rmm::exec_policy(stream), d_prefix_sums_to_update.begin(), d_prefix_sums_to_update.end(), @@ -722,11 +723,20 @@ void generate_offsets_for_list(host_span buff_data, rmm::cuda_ } /** - * @brief TODO - * @param input - * @param size_limit - * @param stream - * @return + * @brief Find the splits of the input table such that each split range has cumulative size less + * than a given `size_limit`. + * + * The parameter `segment_length` is to control the granularity of splits. The output ranges will + * always have numbers of rows that are multiple of this value, except the last range that contains + * the remaining rows. + * + * Similar to `find_splits`, the given limit is just a soft limit. The function will never output + * empty ranges, even they have sizes exceed the value of `size_limit`. + * + * @param input The input table to find splits + * @param size_limit A limit on the output size of each split range + * @param stream CUDA stream used for device memory operations and kernel launches + * @return A vector of ranges as splits of the input */ std::vector find_table_splits(table_view const& input, size_type segment_length, @@ -738,14 +748,12 @@ std::vector find_table_splits(table_view const& input, #endif // If segment_length is zero: we don't have any limit on granularity. - // As such, set segment length to the number of rows. + // As such, set segment length equal to the number of rows. if (segment_length == 0) { segment_length = input.num_rows(); } - // If we have small number of rows, need to adjust segment_length before calling to - // `segmented_row_bit_count`. + // `segmented_row_bit_count` requires that `segment_length` is not larger than number of rows. segment_length = std::min(segment_length, input.num_rows()); - // Default 10k rows. auto const d_segmented_sizes = cudf::detail::segmented_row_bit_count( input, segment_length, stream, rmm::mr::get_current_device_resource()); @@ -798,7 +806,6 @@ std::vector find_table_splits(table_view const& input, } // namespace -// TODO: this should be called per chunk of stripes. void reader::impl::decompress_and_decode() { if (_file_itm_data.has_no_data()) { return; } @@ -809,6 +816,7 @@ void reader::impl::decompress_and_decode() auto const stripe_end = stripe_range.end; auto const stripe_count = stripe_range.end - stripe_range.begin; + // The start index of loaded stripes. They are different from decoding stripes. auto const load_stripe_start = _chunk_read_data.load_stripe_ranges[_chunk_read_data.curr_load_stripe_range - 1].begin; @@ -817,40 +825,36 @@ void reader::impl::decompress_and_decode() printf("\n loaded stripe start %d \n", (int)load_stripe_start); #endif - auto const rows_to_skip = _file_itm_data.rows_to_skip; - // auto const rows_to_read = _file_itm_data.rows_to_read; + auto const rows_to_skip = _file_itm_data.rows_to_skip; auto const& selected_stripes = _file_itm_data.selected_stripes; - // auto const rows_to_skip = 0; - int64_t rows_to_read = 0; + // Number of rows to decode in this decompressing/decoding step. + int64_t rows_to_decode = 0; for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { auto const& stripe = selected_stripes[stripe_idx]; - auto const stripe_info = stripe.stripe_info; - // TODO: this is indeed not needed since we split stripes before this based on stripe row - - // TODO: check overflow - // CUDF_EXPECTS(per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows < - // static_cast(std::numeric_limits::max()), - // "TODO"); - rows_to_read += static_cast(stripe_info->numberOfRows); - - if (_file_itm_data.rows_to_skip > 0) { - CUDF_EXPECTS(_file_itm_data.rows_to_skip < static_cast(stripe_info->numberOfRows), - "TODO"); + auto const stripe_rows = static_cast(stripe.stripe_info->numberOfRows); + rows_to_decode += stripe_rows; + + // The rows to skip should never be larger than number of rows in the first loaded stripes. + // This is just to make sure there was not any bug with it. + if (rows_to_skip > 0) { + CUDF_EXPECTS(rows_to_skip < stripe_rows, "Invalid rows_to_skip computation."); } } - CUDF_EXPECTS(rows_to_read > rows_to_skip, "Invalid rows_to_read computation."); - rows_to_read = std::min(rows_to_read - rows_to_skip, _file_itm_data.rows_to_read); - // rows_to_read -= rows_to_skip; + CUDF_EXPECTS(rows_to_decode > rows_to_skip, "Invalid rows_to_decode computation."); + rows_to_decode = std::min(rows_to_decode - rows_to_skip, _file_itm_data.rows_to_read); + + // After this step, we no longer have any rows to skip. + // The number of rows remains to read in the future also reduced. _file_itm_data.rows_to_skip = 0; - _file_itm_data.rows_to_read -= rows_to_read; + _file_itm_data.rows_to_read -= rows_to_decode; #ifdef LOCAL_TEST - printf("decode, skip = %ld, read = %ld\n", rows_to_skip, rows_to_read); + printf("decode, skip = %ld, decode = %ld\n", rows_to_skip, rows_to_decode); #endif - CUDF_EXPECTS(rows_to_read <= static_cast(std::numeric_limits::max()), + CUDF_EXPECTS(rows_to_decode <= static_cast(std::numeric_limits::max()), "Number or rows to decode exceeds the column size limit.", std::overflow_error); @@ -1017,8 +1021,9 @@ void reader::impl::decompress_and_decode() _metadata.is_row_grp_idx_present() && // Only use if we don't have much work with complete columns & stripes // TODO: Consider nrows, gpu, and tune the threshold - (rows_to_read > _metadata.get_row_index_stride() && !(_metadata.get_row_index_stride() & 7) && - _metadata.get_row_index_stride() != 0 && num_columns * stripe_count < 8 * 128) && + (rows_to_decode > _metadata.get_row_index_stride() && + !(_metadata.get_row_index_stride() & 7) && _metadata.get_row_index_stride() != 0 && + num_columns * stripe_count < 8 * 128) && // Only use if first row is aligned to a stripe boundary // TODO: Fix logic to handle unaligned rows (rows_to_skip == 0); @@ -1125,7 +1130,7 @@ void reader::impl::decompress_and_decode() // (int)chunk.start_row, // (int)chunk.num_rows); - chunk.column_num_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[col_idx]; + chunk.column_num_rows = (level == 0) ? rows_to_decode : col_meta.num_child_rows[col_idx]; chunk.parent_validity_info = (level == 0) ? column_validity_info{} : col_meta.parent_column_data[col_idx]; chunk.parent_null_count_prefix_sums = @@ -1311,7 +1316,7 @@ void reader::impl::decompress_and_decode() } } auto is_list_type = (column_types[i].id() == type_id::LIST); - auto n_rows = (level == 0) ? rows_to_read : col_meta.num_child_rows[i]; + auto n_rows = (level == 0) ? rows_to_decode : col_meta.num_child_rows[i]; // printf(" create col, num rows: %d\n", (int)n_rows); From 73c1a193620e004373cac025e0853ce318d9c6c5 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 12:55:18 -0700 Subject: [PATCH 220/321] Update `hostdevice_vector.hpp` Signed-off-by: Nghia Truong --- cpp/src/io/utilities/hostdevice_vector.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index af1591b709a..0883ac3609f 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -53,14 +53,12 @@ class hostdevice_vector { } explicit hostdevice_vector(size_t initial_size, size_t max_size, rmm::cuda_stream_view stream) - : h_data({cudf::io::get_host_memory_resource(), stream}), d_data(0, stream) + : h_data({cudf::io::get_host_memory_resource(), stream}), d_data(max_size, stream) { CUDF_EXPECTS(initial_size <= max_size, "initial_size cannot be larger than max_size"); h_data.reserve(max_size); h_data.resize(initial_size); - - d_data.resize(max_size, stream); } void push_back(T const& data) From a897155c95d4aed440463077801b84bbeb03bc1f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 13:11:29 -0700 Subject: [PATCH 221/321] Optimize `tz_table` parameter usage Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_decode.cu | 74 ++++------------------------ 1 file changed, 9 insertions(+), 65 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index e85389e4c9a..ef0ae79dcfb 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -462,7 +462,7 @@ void decode_stream_data(std::size_t num_dicts, int64_t skip_rows, size_type row_index_stride, std::size_t level, - table_view const& tz_table, + table_device_view const& d_tz_table, cudf::detail::hostdevice_2dvector& chunks, cudf::detail::device_2dspan row_groups, std::vector& out_buffers, @@ -504,7 +504,6 @@ void decode_stream_data(std::size_t num_dicts, update_null_mask(chunks, out_buffers, stream, mr); } - auto const tz_table_dptr = table_device_view::create(tz_table, stream); rmm::device_scalar error_count(0, stream); // Update the null map for child columns @@ -526,7 +525,7 @@ void decode_stream_data(std::size_t num_dicts, num_columns, num_stripes, skip_rows, - *tz_table_dptr, + d_tz_table, row_groups.size().first, row_index_stride, level, @@ -836,7 +835,8 @@ void reader::impl::decompress_and_decode() rows_to_decode += stripe_rows; // The rows to skip should never be larger than number of rows in the first loaded stripes. - // This is just to make sure there was not any bug with it. + // Technically, overflow here should never happen since `select_stripes` already checked it. + // This is just to make sure there was not any bug there. if (rows_to_skip > 0) { CUDF_EXPECTS(rows_to_skip < stripe_rows, "Invalid rows_to_skip computation."); } @@ -854,6 +854,8 @@ void reader::impl::decompress_and_decode() printf("decode, skip = %ld, decode = %ld\n", rows_to_skip, rows_to_decode); #endif + // Technically, overflow here should never happen because the `load_data()` step + // already handled it by spliting the loaded stripe range into multiple decode ranges. CUDF_EXPECTS(rows_to_decode <= static_cast(std::numeric_limits::max()), "Number or rows to decode exceeds the column size limit.", std::overflow_error); @@ -871,6 +873,7 @@ void reader::impl::decompress_and_decode() {}, selected_stripes[0].stripe_footer->writerTimezone, _stream) : std::make_unique(); }(); + auto const tz_table_dptr = table_device_view::create(tz_table->view(), _stream); auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; auto& null_count_prefix_sums = _file_itm_data.null_count_prefix_sums; @@ -891,66 +894,7 @@ void reader::impl::decompress_and_decode() // Iterates through levels of nested columns, child column will be one level down // compared to parent column. - auto& col_meta = *_col_meta; - -#if 0 - printf("num_child_rows: (size %d)\n", (int)_col_meta->num_child_rows.size()); - if (_col_meta->num_child_rows.size()) { - for (auto x : _col_meta->num_child_rows) { - printf("%d, ", (int)x); - } - printf("\n"); - - _col_meta->num_child_rows.clear(); - } - - printf("parent_column_data null count: (size %d)\n", (int)_col_meta->parent_column_data.size()); - if (_col_meta->parent_column_data.size()) { - for (auto x : _col_meta->parent_column_data) { - printf("%d, ", (int)x.null_count); - } - printf("\n"); - _col_meta->parent_column_data.clear(); - } - - printf("parent_column_index: (size %d)\n", (int)_col_meta->parent_column_index.size()); - if (_col_meta->parent_column_index.size()) { - for (auto x : _col_meta->parent_column_index) { - printf("%d, ", (int)x); - } - printf("\n"); - _col_meta->parent_column_index.clear(); - } - - printf("child_start_row: (size %d)\n", (int)_col_meta->child_start_row.size()); - if (_col_meta->child_start_row.size()) { - for (auto x : _col_meta->child_start_row) { - printf("%d, ", (int)x); - } - printf("\n"); - _col_meta->child_start_row.clear(); - } - - printf("num_child_rows_per_stripe: (size %d)\n", - (int)_col_meta->num_child_rows_per_stripe.size()); - if (_col_meta->num_child_rows_per_stripe.size()) { - for (auto x : _col_meta->num_child_rows_per_stripe) { - printf("%d, ", (int)x); - } - printf("\n"); - _col_meta->num_child_rows_per_stripe.clear(); - } - - printf("rwgrp_meta: (size %d)\n", (int)_col_meta->rwgrp_meta.size()); - if (_col_meta->rwgrp_meta.size()) { - for (auto x : _col_meta->rwgrp_meta) { - printf("(%d | %d), ", (int)x.start_row, (int)x.num_rows); - } - printf("\n"); - } - -#endif - + auto& col_meta = *_col_meta; auto& lvl_stripe_stream_ranges = _file_itm_data.lvl_stripe_stream_ranges; for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { @@ -1360,7 +1304,7 @@ void reader::impl::decompress_and_decode() rows_to_skip, _metadata.get_row_index_stride(), level, - tz_table->view(), + *tz_table_dptr, chunks, row_groups, _out_buffers[level], From 91f9cce3a51efc64bafae284d1142bcb47ab3746 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 14:09:21 -0700 Subject: [PATCH 222/321] Make `null_count_prefix_sums` local to decoding step Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.hpp | 3 -- cpp/src/io/orc/reader_impl_decode.cu | 48 ++++++++++++------------- 2 files changed, 23 insertions(+), 28 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index a3883426787..2b35e51b6f1 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -162,9 +162,6 @@ struct file_intermediate_data { // This is used to identify the range of streams for each stripe from that vector. std::vector> lvl_stripe_stream_ranges; - // TODO rename - std::vector>> null_count_prefix_sums; - // For data processing, decompression, and decoding. // Each 'chunk' of data here corresponds to an orc column, in a stripe, at a nested level. std::vector> lvl_data_chunks; diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index ef0ae79dcfb..a9c4f3352f6 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -875,17 +875,17 @@ void reader::impl::decompress_and_decode() }(); auto const tz_table_dptr = table_device_view::create(tz_table->view(), _stream); - auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; - auto& null_count_prefix_sums = _file_itm_data.null_count_prefix_sums; - auto& lvl_chunks = _file_itm_data.lvl_data_chunks; + auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; + auto& lvl_chunks = _file_itm_data.lvl_data_chunks; - null_count_prefix_sums.clear(); + auto const num_levels = _selected_columns.num_levels(); // TODO: move this to global step lvl_chunks.resize(_selected_columns.num_levels()); - _out_buffers.clear(); - _out_buffers.resize(_selected_columns.num_levels()); + _out_buffers.resize(num_levels); + + std::vector>> null_count_prefix_sums(num_levels); // // // @@ -945,9 +945,10 @@ void reader::impl::decompress_and_decode() } } - auto const num_columns = columns_level.size(); - auto& chunks = lvl_chunks[level]; - chunks = cudf::detail::hostdevice_2dvector(stripe_count, num_columns, _stream); + auto const num_level_columns = columns_level.size(); + auto& chunks = lvl_chunks[level]; + chunks = + cudf::detail::hostdevice_2dvector(stripe_count, num_level_columns, _stream); memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); #ifdef LOCAL_TEST @@ -967,7 +968,7 @@ void reader::impl::decompress_and_decode() // TODO: Consider nrows, gpu, and tune the threshold (rows_to_decode > _metadata.get_row_index_stride() && !(_metadata.get_row_index_stride() & 7) && _metadata.get_row_index_stride() != 0 && - num_columns * stripe_count < 8 * 128) && + num_level_columns * stripe_count < 8 * 128) && // Only use if first row is aligned to a stripe boundary // TODO: Fix logic to handle unaligned rows (rows_to_skip == 0); @@ -979,14 +980,11 @@ void reader::impl::decompress_and_decode() // Logically view streams as columns auto const& stream_info = _file_itm_data.lvl_stream_info[level]; - null_count_prefix_sums.emplace_back(); - null_count_prefix_sums.back().reserve(_selected_columns.levels[level].size()); - std::generate_n(std::back_inserter(null_count_prefix_sums.back()), - _selected_columns.levels[level].size(), - [&]() { - return cudf::detail::make_zeroed_device_uvector_async( - stripe_count, _stream, rmm::mr::get_current_device_resource()); - }); + null_count_prefix_sums[level].reserve(num_level_columns); + std::generate_n(std::back_inserter(null_count_prefix_sums[level]), num_level_columns, [&]() { + return cudf::detail::make_zeroed_device_uvector_async( + stripe_count, _stream, rmm::mr::get_current_device_resource()); + }); // Tracker for eventually deallocating compressed and uncompressed data auto& stripe_data = lvl_stripe_data[level]; @@ -1055,19 +1053,19 @@ void reader::impl::decompress_and_decode() // fflush(stdout); // Update chunks to reference streams pointers - for (std::size_t col_idx = 0; col_idx < num_columns; col_idx++) { + for (std::size_t col_idx = 0; col_idx < num_level_columns; col_idx++) { auto& chunk = chunks[stripe_idx - stripe_start][col_idx]; // start row, number of rows in a each stripe and total number of rows // may change in lower levels of nesting chunk.start_row = (level == 0) ? stripe_start_row - : col_meta.child_start_row[(stripe_idx - stripe_start) * num_columns + col_idx]; + : col_meta.child_start_row[(stripe_idx - stripe_start) * num_level_columns + col_idx]; chunk.num_rows = (level == 0) ? static_cast(stripe_info->numberOfRows) - : col_meta - .num_child_rows_per_stripe[(stripe_idx - stripe_start) * num_columns + col_idx]; + : col_meta.num_child_rows_per_stripe[(stripe_idx - stripe_start) * num_level_columns + + col_idx]; // printf("col idx: %d, start_row: %d, num rows: %d\n", // (int)col_idx, @@ -1132,10 +1130,10 @@ void reader::impl::decompress_and_decode() // Process dataset chunk pages into output columns auto row_groups = - cudf::detail::hostdevice_2dvector(num_rowgroups, num_columns, _stream); + cudf::detail::hostdevice_2dvector(num_rowgroups, num_level_columns, _stream); if (level > 0 and row_groups.size().first) { cudf::host_span row_groups_span(row_groups.base_host_ptr(), - num_rowgroups * num_columns); + num_rowgroups * num_level_columns); auto& rw_grp_meta = col_meta.rwgrp_meta; // Update start row and num rows per row group @@ -1215,7 +1213,7 @@ void reader::impl::decompress_and_decode() gpu::ParseRowGroupIndex(row_groups.base_device_ptr(), nullptr, chunks.base_device_ptr(), - num_columns, + num_level_columns, stripe_count, _metadata.get_row_index_stride(), level == 0, From dd7e850d83b6ed80f3710d870216f7c754da7ab1 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 14:17:20 -0700 Subject: [PATCH 223/321] Make `lvl_chunks` local to decoding step and some cleanup Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 1 - cpp/src/io/orc/reader_impl_chunking.hpp | 4 ---- cpp/src/io/orc/reader_impl_decode.cu | 25 +++++++++++-------------- 3 files changed, 11 insertions(+), 19 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 9ea03fd2e56..a6efbb3c3c4 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -310,7 +310,6 @@ void reader::impl::global_preprocess(read_mode mode) lvl_stripe_sizes.resize(num_levels); lvl_stream_info.resize(num_levels); lvl_stripe_stream_ranges.resize(num_levels); - _file_itm_data.lvl_data_chunks.resize(num_levels); _out_buffers.resize(num_levels); auto& read_info = _file_itm_data.data_read_info; diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 2b35e51b6f1..2aa48acb56b 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -162,10 +162,6 @@ struct file_intermediate_data { // This is used to identify the range of streams for each stripe from that vector. std::vector> lvl_stripe_stream_ranges; - // For data processing, decompression, and decoding. - // Each 'chunk' of data here corresponds to an orc column, in a stripe, at a nested level. - std::vector> lvl_data_chunks; - bool global_preprocessed{false}; }; diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index a9c4f3352f6..7b0b5c0b127 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -875,27 +875,24 @@ void reader::impl::decompress_and_decode() }(); auto const tz_table_dptr = table_device_view::create(tz_table->view(), _stream); - auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; - auto& lvl_chunks = _file_itm_data.lvl_data_chunks; - auto const num_levels = _selected_columns.num_levels(); - - // TODO: move this to global step - lvl_chunks.resize(_selected_columns.num_levels()); - _out_buffers.resize(num_levels); + // Column descriptors ('chunks'). + // Each 'chunk' of data here corresponds to an orc column, in a stripe, at a nested level. + std::vector> lvl_chunks(num_levels); + + // For computing null count. std::vector>> null_count_prefix_sums(num_levels); + // // // - // TODO: move this to reader_impl.cu, decomp and decode step - // std::size_t num_stripes = selected_stripes.size(); // Iterates through levels of nested columns, child column will be one level down // compared to parent column. - auto& col_meta = *_col_meta; - auto& lvl_stripe_stream_ranges = _file_itm_data.lvl_stripe_stream_ranges; + auto& col_meta = *_col_meta; + auto const& lvl_stripe_stream_ranges = _file_itm_data.lvl_stripe_stream_ranges; for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { #ifdef LOCAL_TEST @@ -987,7 +984,7 @@ void reader::impl::decompress_and_decode() }); // Tracker for eventually deallocating compressed and uncompressed data - auto& stripe_data = lvl_stripe_data[level]; + auto& stripe_data = _file_itm_data.lvl_stripe_data[level]; int64_t stripe_start_row = 0; int64_t num_dict_entries = 0; @@ -1169,7 +1166,7 @@ void reader::impl::decompress_and_decode() auto decomp_data = decompress_stripe_data( _chunk_read_data.load_stripe_ranges[_chunk_read_data.curr_load_stripe_range - 1], - get_range(_file_itm_data.lvl_stripe_stream_ranges[level], stripe_range), + stream_range, stripe_count, _file_itm_data.compinfo_map, *_metadata.per_file_metadata[0].decompressor, @@ -1379,7 +1376,7 @@ void reader::impl::decompress_and_decode() for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { _out_buffers[level].clear(); - auto& stripe_data = lvl_stripe_data[level]; + auto& stripe_data = _file_itm_data.lvl_stripe_data[level]; if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { stripe_data[stripe_start - load_stripe_start] = {}; From 89a2ac0c646cc91d9b7410428a8673da96a2921d Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 14:40:18 -0700 Subject: [PATCH 224/321] Reorder variables Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.hpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 2aa48acb56b..939beb03b2b 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -110,18 +110,6 @@ struct file_intermediate_data { // Return true if no rows or stripes to read. bool has_no_data() const { return rows_to_read == 0 || selected_stripes.empty(); } - // Store the compression information for each data stream. - stream_source_map compinfo_map; - - // The buffers to store raw data read from disk, initialized for each reading stripe chunks. - // After decoding, such buffers can be released. - // This can only be implemented after chunked output is ready. - std::vector> lvl_stripe_data; - - // Store the size of each stripe at each nested level. - // This is used to initialize the stripe_data buffers. - std::vector> lvl_stripe_sizes; - // Store information to identify where to read a chunk of data from source. // Each read corresponds to one or more consecutive streams combined. struct stream_data_read_info { @@ -155,6 +143,9 @@ struct file_intermediate_data { // Those reads are identified by a chunk of consecutive read info, stored in data_read_info. std::vector stripe_data_read_ranges; + // Store the compression information for each data stream. + stream_source_map compinfo_map; + // Store info for each ORC stream at each nested level. std::vector> lvl_stream_info; @@ -162,6 +153,15 @@ struct file_intermediate_data { // This is used to identify the range of streams for each stripe from that vector. std::vector> lvl_stripe_stream_ranges; + // The buffers to store raw data read from disk, initialized for each reading stripe chunks. + // After decoding, such buffers can be released. + // This can only be implemented after chunked output is ready. + std::vector> lvl_stripe_data; + + // Store the size of each stripe at each nested level. + // This is used to initialize the stripe_data buffers. + std::vector> lvl_stripe_sizes; + bool global_preprocessed{false}; }; From c585c44054297d8c45d3baec0ca8395f7c6415bc Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 15:20:21 -0700 Subject: [PATCH 225/321] Cleanup and rename variables Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 54 +++++++++++++++----- cpp/src/io/orc/reader_impl_chunking.hpp | 23 ++++++++- cpp/src/io/orc/reader_impl_decode.cu | 66 ++++++------------------- 3 files changed, 79 insertions(+), 64 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index a6efbb3c3c4..f10c5b754c0 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -68,7 +68,7 @@ namespace cudf::io::orc::detail { std::size_t gather_stream_info_and_column_desc( - std::size_t stripe_processing_order, + std::size_t global_stripe_order, std::size_t level, orc::StripeInformation const* stripeinfo, orc::StripeFooter const* stripefooter, @@ -77,7 +77,7 @@ std::size_t gather_stream_info_and_column_desc( bool use_index, bool apply_struct_map, int64_t* num_dictionary_entries, - std::size_t* stream_processing_order, + std::size_t* local_stream_order, std::optional*> const& stream_info, std::optional*> const& chunks) { @@ -124,8 +124,8 @@ std::size_t gather_stream_info_and_column_desc( if (child_idx >= 0) { col = child_idx; if (chunks.has_value()) { - auto& chunk = (*chunks.value())[stripe_processing_order][col]; - chunk.strm_id[gpu::CI_PRESENT] = *stream_processing_order; + auto& chunk = (*chunks.value())[global_stripe_order][col]; + chunk.strm_id[gpu::CI_PRESENT] = *local_stream_order; chunk.strm_len[gpu::CI_PRESENT] = stream.length; } } @@ -136,8 +136,8 @@ std::size_t gather_stream_info_and_column_desc( if (src_offset >= stripeinfo->indexLength || use_index) { auto const index_type = get_stream_index_type(stream.kind); if (index_type < gpu::CI_NUM_STREAMS) { - auto& chunk = (*chunks.value())[stripe_processing_order][col]; - chunk.strm_id[index_type] = *stream_processing_order; + auto& chunk = (*chunks.value())[global_stripe_order][col]; + chunk.strm_id[index_type] = *local_stream_order; chunk.strm_len[index_type] = stream.length; // NOTE: skip_count field is temporarily used to track the presence of index streams chunk.skip_count |= 1 << index_type; @@ -150,13 +150,13 @@ std::size_t gather_stream_info_and_column_desc( } } - (*stream_processing_order)++; + (*local_stream_order)++; } else { // not chunks.has_value() stream_info.value()->emplace_back( stripeinfo->offset + src_offset, dst_offset, stream.length, - stream_source_info{stripe_processing_order, level, column_id, stream.kind}); + stream_source_info{global_stripe_order, level, column_id, stream.kind}); } dst_offset += stream.length; @@ -305,11 +305,15 @@ void reader::impl::global_preprocess(read_mode mode) auto& lvl_stripe_sizes = _file_itm_data.lvl_stripe_sizes; auto& lvl_stream_info = _file_itm_data.lvl_stream_info; auto& lvl_stripe_stream_ranges = _file_itm_data.lvl_stripe_stream_ranges; + auto& lvl_column_types = _file_itm_data.lvl_column_types; + auto& lvl_nested_cols = _file_itm_data.lvl_nested_cols; lvl_stripe_data.resize(num_levels); lvl_stripe_sizes.resize(num_levels); lvl_stream_info.resize(num_levels); lvl_stripe_stream_ranges.resize(num_levels); + lvl_column_types.resize(num_levels); + lvl_nested_cols.resize(num_levels); _out_buffers.resize(num_levels); auto& read_info = _file_itm_data.data_read_info; @@ -322,16 +326,44 @@ void reader::impl::global_preprocess(read_mode mode) // Association between each ORC column and its cudf::column col_meta.orc_col_map.emplace_back(_metadata.get_num_cols(), -1); + auto const& columns_level = _selected_columns.levels[level]; size_type col_id{0}; - for (auto const& col : _selected_columns.levels[level]) { + + for (auto const& col : columns_level) { // Map each ORC column to its column col_meta.orc_col_map[level][col.id] = col_id++; - } - auto const num_columns = _selected_columns.levels[level].size(); + auto const col_type = + to_cudf_type(_metadata.get_col_type(col.id).kind, + _config.use_np_dtypes, + _config.timestamp_type.id(), + to_cudf_decimal_type(_config.decimal128_columns, _metadata, col.id)); + CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type"); + + auto& column_types = lvl_column_types[level]; + auto& nested_cols = lvl_nested_cols[level]; + + if (col_type == type_id::DECIMAL32 or col_type == type_id::DECIMAL64 or + col_type == type_id::DECIMAL128) { + // sign of the scale is changed since cuDF follows c++ libraries like CNL + // which uses negative scaling, but liborc and other libraries + // follow positive scaling. + auto const scale = + -static_cast(_metadata.get_col_type(col.id).scale.value_or(0)); + column_types.emplace_back(col_type, scale); + } else { + column_types.emplace_back(col_type); + } + + // Map each ORC column to its column. + if (col_type == type_id::LIST or col_type == type_id::STRUCT) { + nested_cols.emplace_back(col); + } + } // Try to reserve some memory, but the final size is unknown, // since each column may have more than one stream. + auto const num_columns = columns_level.size(); lvl_stream_info[level].reserve(num_total_stripes * num_columns); if (read_info.capacity() < num_total_stripes * num_columns) { read_info.reserve(num_total_stripes * num_columns); diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 939beb03b2b..b2964e996c2 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -162,6 +162,12 @@ struct file_intermediate_data { // This is used to initialize the stripe_data buffers. std::vector> lvl_stripe_sizes; + // List of column data types at each nested level. + std::vector> lvl_column_types; + + // List of nested type columns at each nested level. + std::vector> lvl_nested_cols; + bool global_preprocessed{false}; }; @@ -292,9 +298,22 @@ range get_range(std::vector const& input_ranges, range const& selected_ra * descriptors (`chunks` is present) during decompression and decoding. The two steps share * most of the execution path thus this function takes mutually exclusive parameters `stream_info` * or `chunks` depending on each use case. + * + * @param global_stripe_order The global index of the current decoding stripe + * @param level The nested level of the current decoding column + * @param stripeinfo The pointer to current decoding stripe's information + * @param stripefooter The pointer to current decoding stripe's footer + * @param orc2gdf The mapping from ORC column ids to gdf column ids + * @param types The schema type + * @param use_index Whether to use the row index for parsing + * @param apply_struct_map Indicating if this is the root level + * @param num_dictionary_entries The number of dictionary entries + * @param local_stream_order For retrieving 0-based orders of streams in the current decoding step + * @param stream_info The vector of streams' information + * @param chunks The vector of column descriptors */ std::size_t gather_stream_info_and_column_desc( - std::size_t stripe_processing_order, + std::size_t global_stripe_order, std::size_t level, orc::StripeInformation const* stripeinfo, orc::StripeFooter const* stripefooter, @@ -303,7 +322,7 @@ std::size_t gather_stream_info_and_column_desc( bool use_index, bool apply_struct_map, int64_t* num_dictionary_entries, - std::size_t* stream_processing_order, + std::size_t* local_stream_order, std::optional*> const& stream_info, std::optional*> const& chunks); diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 7b0b5c0b127..195288fb1a9 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -891,8 +891,7 @@ void reader::impl::decompress_and_decode() // Iterates through levels of nested columns, child column will be one level down // compared to parent column. - auto& col_meta = *_col_meta; - auto const& lvl_stripe_stream_ranges = _file_itm_data.lvl_stripe_stream_ranges; + auto& col_meta = *_col_meta; for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { #ifdef LOCAL_TEST @@ -906,44 +905,16 @@ void reader::impl::decompress_and_decode() } #endif - auto const& stripe_stream_ranges = lvl_stripe_stream_ranges[level]; + auto const& stripe_stream_ranges = _file_itm_data.lvl_stripe_stream_ranges[level]; auto const stream_range = get_range(stripe_stream_ranges, stripe_range); auto& columns_level = _selected_columns.levels[level]; + auto& chunks = lvl_chunks[level]; - // TODO: do it in global step - // Association between each ORC column and its cudf::column - std::vector nested_cols; - - // Get a list of column data types - std::vector column_types; - for (auto& col : columns_level) { - auto col_type = - to_cudf_type(_metadata.get_col_type(col.id).kind, - _config.use_np_dtypes, - _config.timestamp_type.id(), - to_cudf_decimal_type(_config.decimal128_columns, _metadata, col.id)); - CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type"); - if (col_type == type_id::DECIMAL32 or col_type == type_id::DECIMAL64 or - col_type == type_id::DECIMAL128) { - // sign of the scale is changed since cuDF follows c++ libraries like CNL - // which uses negative scaling, but liborc and other libraries - // follow positive scaling. - auto const scale = - -static_cast(_metadata.get_col_type(col.id).scale.value_or(0)); - column_types.emplace_back(col_type, scale); - } else { - column_types.emplace_back(col_type); - } - - // Map each ORC column to its column - if (col_type == type_id::LIST or col_type == type_id::STRUCT) { - nested_cols.emplace_back(col); - } - } - + auto const& column_types = _file_itm_data.lvl_column_types[level]; + auto const& nested_cols = _file_itm_data.lvl_nested_cols[level]; auto const num_level_columns = columns_level.size(); - auto& chunks = lvl_chunks[level]; + chunks = cudf::detail::hostdevice_2dvector(stripe_count, num_level_columns, _stream); memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); @@ -974,24 +945,19 @@ void reader::impl::decompress_and_decode() printf(" use_index: %d\n", (int)use_index); #endif - // Logically view streams as columns - auto const& stream_info = _file_itm_data.lvl_stream_info[level]; - null_count_prefix_sums[level].reserve(num_level_columns); std::generate_n(std::back_inserter(null_count_prefix_sums[level]), num_level_columns, [&]() { return cudf::detail::make_zeroed_device_uvector_async( stripe_count, _stream, rmm::mr::get_current_device_resource()); }); - // Tracker for eventually deallocating compressed and uncompressed data - auto& stripe_data = _file_itm_data.lvl_stripe_data[level]; - - int64_t stripe_start_row = 0; - int64_t num_dict_entries = 0; - int64_t num_rowgroups = 0; + auto& stripe_data = _file_itm_data.lvl_stripe_data[level]; + auto const& stream_info = _file_itm_data.lvl_stream_info[level]; - // TODO: Stripe and stream idx must be by chunk. - std::size_t stream_processing_order = 0; + int64_t stripe_start_row{0}; + int64_t num_dict_entries{0}; + int64_t num_rowgroups{0}; + std::size_t local_stream_order{0}; for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { #ifdef LOCAL_TEST @@ -1002,10 +968,8 @@ void reader::impl::decompress_and_decode() auto const stripe_info = stripe.stripe_info; auto const stripe_footer = stripe.stripe_footer; - // printf("stripeinfo->indexLength: %d, data: %d\n", - // (int)stripe_info->indexLength, - // (int)stripe_info->dataLength); - + // Gather only for the decoding stripes, thus the first parameter (`stripe_processing_order`) + // needs to be normalized to be 0-based. auto const total_data_size = gather_stream_info_and_column_desc(stripe_idx - stripe_start, level, stripe_info, @@ -1015,7 +979,7 @@ void reader::impl::decompress_and_decode() use_index, level == 0, &num_dict_entries, - &stream_processing_order, + &local_stream_order, std::nullopt, // stream_info &chunks); From f339b2362f346ecead176afd0d52cda17e00c4ae Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 15:28:47 -0700 Subject: [PATCH 226/321] Reorder code Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_decode.cu | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 195288fb1a9..2d9c88da8d4 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -885,14 +885,7 @@ void reader::impl::decompress_and_decode() // For computing null count. std::vector>> null_count_prefix_sums(num_levels); - // - // - // - - // Iterates through levels of nested columns, child column will be one level down - // compared to parent column. auto& col_meta = *_col_meta; - for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { #ifdef LOCAL_TEST printf("processing level = %d\n", (int)level); @@ -908,13 +901,15 @@ void reader::impl::decompress_and_decode() auto const& stripe_stream_ranges = _file_itm_data.lvl_stripe_stream_ranges[level]; auto const stream_range = get_range(stripe_stream_ranges, stripe_range); - auto& columns_level = _selected_columns.levels[level]; - auto& chunks = lvl_chunks[level]; + auto const& columns_level = _selected_columns.levels[level]; + auto const& stream_info = _file_itm_data.lvl_stream_info[level]; + auto const& column_types = _file_itm_data.lvl_column_types[level]; + auto const& nested_cols = _file_itm_data.lvl_nested_cols[level]; - auto const& column_types = _file_itm_data.lvl_column_types[level]; - auto const& nested_cols = _file_itm_data.lvl_nested_cols[level]; - auto const num_level_columns = columns_level.size(); + auto& stripe_data = _file_itm_data.lvl_stripe_data[level]; + auto& chunks = lvl_chunks[level]; + auto const num_level_columns = columns_level.size(); chunks = cudf::detail::hostdevice_2dvector(stripe_count, num_level_columns, _stream); memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); @@ -951,9 +946,6 @@ void reader::impl::decompress_and_decode() stripe_count, _stream, rmm::mr::get_current_device_resource()); }); - auto& stripe_data = _file_itm_data.lvl_stripe_data[level]; - auto const& stream_info = _file_itm_data.lvl_stream_info[level]; - int64_t stripe_start_row{0}; int64_t num_dict_entries{0}; int64_t num_rowgroups{0}; From 9a2cee0058880c0833827fa9c2627b209bc92c36 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 15:52:36 -0700 Subject: [PATCH 227/321] More cleanup and code reordering Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_decode.cu | 102 +++++++++------------------ 1 file changed, 34 insertions(+), 68 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 2d9c88da8d4..32e85e41851 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -816,8 +816,10 @@ void reader::impl::decompress_and_decode() auto const stripe_count = stripe_range.end - stripe_range.begin; // The start index of loaded stripes. They are different from decoding stripes. - auto const load_stripe_start = - _chunk_read_data.load_stripe_ranges[_chunk_read_data.curr_load_stripe_range - 1].begin; + CUDF_EXPECTS(_chunk_read_data.curr_load_stripe_range > 0, "There is not any stripe loaded."); + auto const load_stripe_range = + _chunk_read_data.load_stripe_ranges[_chunk_read_data.curr_load_stripe_range - 1]; + auto const load_stripe_start = load_stripe_range.begin; #ifdef LOCAL_TEST printf("\ndecoding data from stripe %d -> %d\n", (int)stripe_start, (int)stripe_end); @@ -946,6 +948,7 @@ void reader::impl::decompress_and_decode() stripe_count, _stream, rmm::mr::get_current_device_resource()); }); + // 0-based counters, used accross all decoding stripes in this step. int64_t stripe_start_row{0}; int64_t num_dict_entries{0}; int64_t num_rowgroups{0}; @@ -960,8 +963,8 @@ void reader::impl::decompress_and_decode() auto const stripe_info = stripe.stripe_info; auto const stripe_footer = stripe.stripe_footer; - // Gather only for the decoding stripes, thus the first parameter (`stripe_processing_order`) - // needs to be normalized to be 0-based. + // Gather only for the decoding stripes, thus the first parameter (`global_stripe_order`) + // needs to be normalized to 0-based. auto const total_data_size = gather_stream_info_and_column_desc(stripe_idx - stripe_start, level, stripe_info, @@ -983,29 +986,20 @@ void reader::impl::decompress_and_decode() CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, "Invalid index rowgroup stream data"); - // TODO: Wrong? - // stripe load_stripe_start? - auto dst_base = static_cast(stripe_data[stripe_idx - load_stripe_start].data()); - - // printf("line %d\n", __LINE__); - // fflush(stdout); - + auto const dst_base = + static_cast(stripe_data[stripe_idx - load_stripe_start].data()); auto const num_rows_per_stripe = static_cast(stripe_info->numberOfRows); + auto const rowgroup_id = num_rowgroups; + auto const stripe_num_rowgroups = + use_index ? (num_rows_per_stripe + _metadata.get_row_index_stride() - 1) / + _metadata.get_row_index_stride() + : 0; + #ifdef LOCAL_TEST printf(" num_rows_per_stripe : %d\n", (int)num_rows_per_stripe); #endif - auto const rowgroup_id = num_rowgroups; - auto stripe_num_rowgroups = 0; - if (use_index) { - stripe_num_rowgroups = (num_rows_per_stripe + _metadata.get_row_index_stride() - 1) / - _metadata.get_row_index_stride(); - } - - // printf("line %d\n", __LINE__); - // fflush(stdout); - - // Update chunks to reference streams pointers + // Update chunks to reference streams pointers. for (std::size_t col_idx = 0; col_idx < num_level_columns; col_idx++) { auto& chunk = chunks[stripe_idx - stripe_start][col_idx]; // start row, number of rows in a each stripe and total number of rows @@ -1016,15 +1010,9 @@ void reader::impl::decompress_and_decode() : col_meta.child_start_row[(stripe_idx - stripe_start) * num_level_columns + col_idx]; chunk.num_rows = (level == 0) - ? static_cast(stripe_info->numberOfRows) + ? num_rows_per_stripe : col_meta.num_child_rows_per_stripe[(stripe_idx - stripe_start) * num_level_columns + col_idx]; - - // printf("col idx: %d, start_row: %d, num rows: %d\n", - // (int)col_idx, - // (int)chunk.start_row, - // (int)chunk.num_rows); - chunk.column_num_rows = (level == 0) ? rows_to_decode : col_meta.num_child_rows[col_idx]; chunk.parent_validity_info = (level == 0) ? column_validity_info{} : col_meta.parent_column_data[col_idx]; @@ -1036,8 +1024,6 @@ void reader::impl::decompress_and_decode() chunk.type_kind = _metadata.per_file_metadata[stripe.source_idx].ff.types[columns_level[col_idx].id].kind; - // printf("type: %d\n", (int)chunk.type_kind); - // num_child_rows for a struct column will be same, for other nested types it will be // calculated. chunk.num_child_rows = (chunk.type_kind != orc::STRUCT) ? 0 : chunk.num_rows; @@ -1054,7 +1040,6 @@ void reader::impl::decompress_and_decode() ? sizeof(size_type) : cudf::size_of(column_types[col_idx]); chunk.num_rowgroups = stripe_num_rowgroups; - // printf("stripe_num_rowgroups: %d\n", (int)stripe_num_rowgroups); if (chunk.type_kind == orc::TIMESTAMP) { chunk.timestamp_type_id = _config.timestamp_type.id(); @@ -1067,21 +1052,13 @@ void reader::impl::decompress_and_decode() } } - // printf("line %d\n", __LINE__); - // fflush(stdout); - stripe_start_row += num_rows_per_stripe; num_rowgroups += stripe_num_rowgroups; - - // stripe_idx++; - } // for (stripe : selected_stripes) - - // printf("line %d\n", __LINE__); - // fflush(stdout); + } if (stripe_data.empty()) { continue; } - // Process dataset chunk pages into output columns + // Process dataset chunks into output columns. auto row_groups = cudf::detail::hostdevice_2dvector(num_rowgroups, num_level_columns, _stream); if (level > 0 and row_groups.size().first) { @@ -1101,16 +1078,8 @@ void reader::impl::decompress_and_decode() }); } - // printf("line %d\n", __LINE__); - // fflush(stdout); - - // Setup row group descriptors if using indexes + // Setup row group descriptors if using indexes. if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { - // printf("decompress----------------------\n"); - // printf("line %d\n", __LINE__); - // fflush(stdout); - CUDF_EXPECTS(_chunk_read_data.curr_load_stripe_range > 0, "ERRRRR"); - #ifdef LOCAL_TEST { _stream.synchronize(); @@ -1120,23 +1089,20 @@ void reader::impl::decompress_and_decode() } #endif - auto decomp_data = decompress_stripe_data( - _chunk_read_data.load_stripe_ranges[_chunk_read_data.curr_load_stripe_range - 1], - stream_range, - stripe_count, - _file_itm_data.compinfo_map, - *_metadata.per_file_metadata[0].decompressor, - stripe_data, - stream_info, - chunks, - row_groups, - _metadata.get_row_index_stride(), - level == 0, - _stream); - // stripe_data.clear(); - // stripe_data.push_back(std::move(decomp_data)); - - // TODO: only reset each one if the new size/type are different. + auto decomp_data = decompress_stripe_data(load_stripe_range, + stream_range, + stripe_count, + _file_itm_data.compinfo_map, + *_metadata.per_file_metadata[0].decompressor, + stripe_data, + stream_info, + chunks, + row_groups, + _metadata.get_row_index_stride(), + level == 0, + _stream); + + // Just save the decompressed data and clear out the raw data to free up memory. stripe_data[stripe_start - load_stripe_start] = std::move(decomp_data); for (std::size_t i = 1; i < stripe_count; ++i) { stripe_data[i + stripe_start - load_stripe_start] = {}; From a0d152886b9d79f1bb92930ac1abc8a0f5a645ab Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 15:59:43 -0700 Subject: [PATCH 228/321] Update docs Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index b2964e996c2..d43b5342eba 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -311,6 +311,7 @@ range get_range(std::vector const& input_ranges, range const& selected_ra * @param local_stream_order For retrieving 0-based orders of streams in the current decoding step * @param stream_info The vector of streams' information * @param chunks The vector of column descriptors + * @return The number of bytes in the gathered streams */ std::size_t gather_stream_info_and_column_desc( std::size_t global_stripe_order, From 75a96d1b0ba021fe3a2b6e86af771b4abeaa6b6b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 16:05:58 -0700 Subject: [PATCH 229/321] Change variable types Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 3 +- cpp/src/io/orc/reader_impl_decode.cu | 40 +++++++------------------- 2 files changed, 13 insertions(+), 30 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index f10c5b754c0..075002276b8 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -145,7 +145,8 @@ std::size_t gather_stream_info_and_column_desc( if (index_type == gpu::CI_DICTIONARY) { chunk.dictionary_start = *num_dictionary_entries; chunk.dict_len = stripefooter->columns[column_id].dictionarySize; - *num_dictionary_entries += stripefooter->columns[column_id].dictionarySize; + *num_dictionary_entries += + static_cast(stripefooter->columns[column_id].dictionarySize); } } } diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 32e85e41851..45e9bcd7265 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -458,7 +458,7 @@ void update_null_mask(cudf::detail::hostdevice_2dvector& chunks * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ -void decode_stream_data(std::size_t num_dicts, +void decode_stream_data(int64_t num_dicts, int64_t skip_rows, size_type row_index_stride, std::size_t level, @@ -951,7 +951,7 @@ void reader::impl::decompress_and_decode() // 0-based counters, used accross all decoding stripes in this step. int64_t stripe_start_row{0}; int64_t num_dict_entries{0}; - int64_t num_rowgroups{0}; + uint32_t num_rowgroups{0}; std::size_t local_stream_order{0}; for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { @@ -989,8 +989,9 @@ void reader::impl::decompress_and_decode() auto const dst_base = static_cast(stripe_data[stripe_idx - load_stripe_start].data()); auto const num_rows_per_stripe = static_cast(stripe_info->numberOfRows); - auto const rowgroup_id = num_rowgroups; - auto const stripe_num_rowgroups = + + uint32_t const rowgroup_id = num_rowgroups; + uint32_t const stripe_num_rowgroups = use_index ? (num_rows_per_stripe + _metadata.get_row_index_stride() - 1) / _metadata.get_row_index_stride() : 0; @@ -1117,15 +1118,8 @@ void reader::impl::decompress_and_decode() } #endif - // printf("line %d\n", __LINE__); - // fflush(stdout); - } else { - // printf("no decompression----------------------\n"); - if (row_groups.size().first) { - // printf("line %d\n", __LINE__); - // fflush(stdout); chunks.host_to_device_async(_stream); row_groups.host_to_device_async(_stream); row_groups.host_to_device_async(_stream); @@ -1140,9 +1134,6 @@ void reader::impl::decompress_and_decode() } } - // printf("line %d\n", __LINE__); - // fflush(stdout); - #ifdef LOCAL_TEST { _stream.synchronize(); @@ -1152,8 +1143,6 @@ void reader::impl::decompress_and_decode() } #endif - // TODO: do not clear but reset each one. - // and only reset if the new size/type are different. _out_buffers[level].clear(); #ifdef LOCAL_TEST @@ -1176,10 +1165,9 @@ void reader::impl::decompress_and_decode() break; } } - auto is_list_type = (column_types[i].id() == type_id::LIST); - auto n_rows = (level == 0) ? rows_to_decode : col_meta.num_child_rows[i]; - // printf(" create col, num rows: %d\n", (int)n_rows); + auto const is_list_type = (column_types[i].id() == type_id::LIST); + auto const n_rows = (level == 0) ? rows_to_decode : col_meta.num_child_rows[i]; #ifdef LOCAL_TEST { @@ -1190,9 +1178,9 @@ void reader::impl::decompress_and_decode() } #endif - // For list column, offset column will be always size + 1 - if (is_list_type) n_rows++; - _out_buffers[level].emplace_back(column_types[i], n_rows, is_nullable, _stream, _mr); + // For list column, offset column will be always size + 1. + _out_buffers[level].emplace_back( + column_types[i], is_list_type ? n_rows + 1 : n_rows, is_nullable, _stream, _mr); #ifdef LOCAL_TEST { @@ -1205,9 +1193,6 @@ void reader::impl::decompress_and_decode() #endif } - // printf("line %d\n", __LINE__); - // fflush(stdout); - #ifdef LOCAL_TEST { _stream.synchronize(); @@ -1237,15 +1222,12 @@ void reader::impl::decompress_and_decode() } #endif - // printf("line %d\n", __LINE__); - // fflush(stdout); - if (nested_cols.size()) { #ifdef LOCAL_TEST printf("have nested col\n"); #endif - // Extract information to process nested child columns + // Extract information to process nested child columns. scan_null_counts(chunks, null_count_prefix_sums[level], _stream); row_groups.device_to_host_sync(_stream); From 96274abe9f5fa1c5d120428ae5d79623b7e39fef Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 16:16:35 -0700 Subject: [PATCH 230/321] More cleanup Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_decode.cu | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 45e9bcd7265..c61b5f00bf8 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -1246,9 +1246,6 @@ void reader::impl::decompress_and_decode() if (not buff_data.empty()) { generate_offsets_for_list(buff_data, _stream); } } - - // printf("line %d\n", __LINE__); - // fflush(stdout); } // end loop level #ifdef LOCAL_TEST @@ -1260,6 +1257,7 @@ void reader::impl::decompress_and_decode() } #endif + // Now generate a table from the decoded result. std::vector> out_columns; _out_metadata = get_meta_with_user_data(); std::transform( @@ -1274,14 +1272,11 @@ void reader::impl::decompress_and_decode() }); _chunk_read_data.decoded_table = std::make_unique
(std::move(out_columns)); - // TODO: do not clear but reset each one. - // and only reset if the new size/type are different. - // This clear is just to check if there is memory leak. + // Free up memory. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { _out_buffers[level].clear(); auto& stripe_data = _file_itm_data.lvl_stripe_data[level]; - if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { stripe_data[stripe_start - load_stripe_start] = {}; } else { @@ -1300,12 +1295,6 @@ void reader::impl::decompress_and_decode() } #endif - // printf("col: \n"); - // cudf::test::print(_chunk_read_data.decoded_table->get_column(0).view()); - - // DEBUG only - // _chunk_read_data.output_size_limit = _chunk_read_data.data_read_limit / 3; - _chunk_read_data.curr_output_table_range = 0; _chunk_read_data.output_table_ranges = _chunk_read_data.output_size_limit == 0 From 246dd5bc17b6348f37511bc5b650b74da5c28553 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 16:22:07 -0700 Subject: [PATCH 231/321] Complete cleaning up Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 57770cec4fe..7835180e0b4 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -130,10 +130,13 @@ table_with_metadata reader::impl::make_output_chunk() auto out_table = [&] { if (_chunk_read_data.output_table_ranges.size() == 1) { + // Must change the index of output range, so calling `has_next()` after that + // can return the correct answer. _chunk_read_data.curr_output_table_range++; #ifdef LOCAL_TEST printf("one chunk, no more table---------------------------------\n"); #endif + // If there is no slicing, just hand over the decoded table. return std::move(_chunk_read_data.decoded_table); } @@ -146,11 +149,11 @@ table_with_metadata reader::impl::make_output_chunk() } #endif - auto const out_chunk = + auto const out_range = _chunk_read_data.output_table_ranges[_chunk_read_data.curr_output_table_range++]; auto const out_tview = cudf::detail::slice( _chunk_read_data.decoded_table->view(), - {static_cast(out_chunk.begin), static_cast(out_chunk.end)}, + {static_cast(out_range.begin), static_cast(out_range.end)}, _stream)[0]; #ifdef LOCAL_TEST @@ -164,7 +167,7 @@ table_with_metadata reader::impl::make_output_chunk() auto output = std::make_unique
(out_tview, _stream, _mr); - // If this is the last slice, we also delete the decoded_table to free up memory. + // If this is the last slice, we also delete the decoded table to free up memory. if (!_chunk_read_data.more_table_chunk_to_output()) { _chunk_read_data.decoded_table.reset(nullptr); } From 027f899a2e57d36309de4acc9099dd44513e9091 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 16:26:33 -0700 Subject: [PATCH 232/321] Revert error message Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 7835180e0b4..ab24b7c1eaf 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -276,7 +276,7 @@ reader::impl::impl(std::size_t output_size_limit, // Selected columns at different levels of nesting are stored in different elements // of `selected_columns`; thus, size == 1 means no nested columns. CUDF_EXPECTS(_config.skip_rows == 0 or _selected_columns.num_levels() == 1, - "skip_rows is not supported if having nested columns"); + "skip_rows is not supported by nested column"); } table_with_metadata reader::impl::read() From 961d4680443e945f0938c7d8cc05f6f44bf1ff29 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 16:29:27 -0700 Subject: [PATCH 233/321] Revert error handling that may be wrong Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_decode.cu | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index c61b5f00bf8..022e776ed10 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -186,9 +186,8 @@ rmm::device_buffer decompress_stripe_data( rmm::device_buffer decomp_data( cudf::util::round_up_safe(total_decomp_size, BUFFER_PADDING_MULTIPLE), stream); - // If total_decomp_size is zero, the data should not be compressed, and this function - // should not be called at all. - CUDF_EXPECTS(!decomp_data.is_empty(), "Invalid decompression size"); + // If total_decomp_size is zero, the input data may be just empty. + if (decomp_data.is_empty()) { return decomp_data; } rmm::device_uvector> inflate_in( num_compressed_blocks + num_uncompressed_blocks, stream); From 30b589925bff2ff1ae5150780e7ba7b45de5fccc Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 16:52:20 -0700 Subject: [PATCH 234/321] Fix spell Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_decode.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 022e776ed10..82ca39e6e57 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -856,7 +856,7 @@ void reader::impl::decompress_and_decode() #endif // Technically, overflow here should never happen because the `load_data()` step - // already handled it by spliting the loaded stripe range into multiple decode ranges. + // already handled it by splitting the loaded stripe range into multiple decode ranges. CUDF_EXPECTS(rows_to_decode <= static_cast(std::numeric_limits::max()), "Number or rows to decode exceeds the column size limit.", std::overflow_error); @@ -947,7 +947,7 @@ void reader::impl::decompress_and_decode() stripe_count, _stream, rmm::mr::get_current_device_resource()); }); - // 0-based counters, used accross all decoding stripes in this step. + // 0-based counters, used across all decoding stripes in this step. int64_t stripe_start_row{0}; int64_t num_dict_entries{0}; uint32_t num_rowgroups{0}; From 40b28faedb1220d357ef042d47eee5d6643f1ae0 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 21:06:19 -0700 Subject: [PATCH 235/321] Update python code Signed-off-by: Nghia Truong --- python/cudf/cudf/_lib/cpp/io/orc.pxd | 14 +++++++------- python/cudf/cudf/_lib/orc.pyx | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/_lib/cpp/io/orc.pxd b/python/cudf/cudf/_lib/cpp/io/orc.pxd index d5ac8574fe4..93e3f61142d 100644 --- a/python/cudf/cudf/_lib/cpp/io/orc.pxd +++ b/python/cudf/cudf/_lib/cpp/io/orc.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2023, NVIDIA CORPORATION. -from libc.stdint cimport uint8_t +from libc.stdint cimport uint8_t, int64_t from libcpp cimport bool from libcpp.map cimport map from libcpp.memory cimport shared_ptr, unique_ptr @@ -21,8 +21,8 @@ cdef extern from "cudf/io/orc.hpp" \ cudf_io_types.source_info get_source() except + vector[vector[size_type]] get_stripes() except + - size_type get_skip_rows() except + - size_type get_num_rows() except + + int64_t get_skip_rows() except + + optional[int64_t] get_num_rows() except + bool is_enabled_use_index() except + bool is_enabled_use_np_dtypes() except + data_type get_timestamp_type() except + @@ -31,8 +31,8 @@ cdef extern from "cudf/io/orc.hpp" \ void set_columns(vector[string] col_names) except + void set_stripes(vector[vector[size_type]] strps) except + - void set_skip_rows(size_type rows) except + - void set_num_rows(size_type nrows) except + + void set_skip_rows(int64_t rows) except + + void set_num_rows(int64_t nrows) except + void enable_use_index(bool val) except + void enable_use_np_dtypes(bool val) except + void set_timestamp_type(data_type type) except + @@ -49,8 +49,8 @@ cdef extern from "cudf/io/orc.hpp" \ orc_reader_options_builder& columns(vector[string] col_names) except + orc_reader_options_builder& \ stripes(vector[vector[size_type]] strps) except + - orc_reader_options_builder& skip_rows(size_type rows) except + - orc_reader_options_builder& num_rows(size_type nrows) except + + orc_reader_options_builder& skip_rows(int64_t rows) except + + orc_reader_options_builder& num_rows(int64_t nrows) except + orc_reader_options_builder& use_index(bool val) except + orc_reader_options_builder& use_np_dtypes(bool val) except + orc_reader_options_builder& timestamp_type(data_type type) except + diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx index 3fc9823b914..aaaeb558846 100644 --- a/python/cudf/cudf/_lib/orc.pyx +++ b/python/cudf/cudf/_lib/orc.pyx @@ -325,7 +325,7 @@ cdef int64_t get_skiprows_arg(object arg) except*: raise TypeError("skiprows must be an int >= 0") return arg -cdef size_type get_num_rows_arg(object arg) except*: +cdef int64_t get_num_rows_arg(object arg) except*: arg = -1 if arg is None else arg if not isinstance(arg, int) or arg < -1: raise TypeError("num_rows must be an int >= -1") @@ -337,7 +337,7 @@ cdef orc_reader_options make_orc_reader_options( object column_names, object stripes, int64_t skip_rows, - size_type num_rows, + int64_t num_rows, type_id timestamp_type, bool use_index ) except*: From 51c2abf3937a68deff4aea49500c9be5e206fac0 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Mar 2024 21:18:18 -0700 Subject: [PATCH 236/321] Update copyright year Signed-off-by: Nghia Truong --- python/cudf/cudf/_lib/cpp/io/orc.pxd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/_lib/cpp/io/orc.pxd b/python/cudf/cudf/_lib/cpp/io/orc.pxd index 93e3f61142d..93e481be760 100644 --- a/python/cudf/cudf/_lib/cpp/io/orc.pxd +++ b/python/cudf/cudf/_lib/cpp/io/orc.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. from libc.stdint cimport uint8_t, int64_t from libcpp cimport bool From de5cf15829d8f89b920b3df3a4a515c1d25b12e0 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Mar 2024 09:13:02 -0700 Subject: [PATCH 237/321] Fix style Signed-off-by: Nghia Truong --- python/cudf/cudf/_lib/cpp/io/orc.pxd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/_lib/cpp/io/orc.pxd b/python/cudf/cudf/_lib/cpp/io/orc.pxd index 93e481be760..d5bb1726a43 100644 --- a/python/cudf/cudf/_lib/cpp/io/orc.pxd +++ b/python/cudf/cudf/_lib/cpp/io/orc.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. -from libc.stdint cimport uint8_t, int64_t +from libc.stdint cimport int64_t, uint8_t from libcpp cimport bool from libcpp.map cimport map from libcpp.memory cimport shared_ptr, unique_ptr From 10945a6d212380253d6921ccbca8a0c78989ce91 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Mar 2024 09:50:19 -0700 Subject: [PATCH 238/321] Change benchmark Signed-off-by: Nghia Truong --- cpp/benchmarks/io/orc/orc_reader_input.cpp | 37 +++++++++++++++++----- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index 0503ede62ed..94327d460ae 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -123,10 +123,8 @@ void BM_orc_read_data(nvbench::state& state, orc_read_common(num_rows_written, source_sink, state); } -template -void BM_orc_read_io_compression( - nvbench::state& state, - nvbench::type_list, nvbench::enum_type>) +template +void orc_read_io_compression(nvbench::state& state) { auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL_SIGNED), static_cast(data_type::FLOAT), @@ -154,14 +152,29 @@ void BM_orc_read_io_compression( return view.num_rows(); }(); - auto const is_chunked_read = static_cast(state.get_int64("chunked_read")); - if (is_chunked_read) { + if constexpr (chunked_read) { orc_read_common(num_rows_written, source_sink, state); } else { orc_read_common(num_rows_written, source_sink, state); } } +template +void BM_orc_read_io_compression( + nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) +{ + return orc_read_io_compression(state); +} + +template +void BM_orc_chunked_read_io_compression( + nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) +{ + return orc_read_io_compression(state); +} + using d_type_list = nvbench::enum_type_list Date: Tue, 12 Mar 2024 10:17:23 -0700 Subject: [PATCH 239/321] Change benchmark Signed-off-by: Nghia Truong --- cpp/benchmarks/io/orc/orc_reader_input.cpp | 36 +++++++++++----------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index 94327d460ae..140fbf7f8aa 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -47,14 +47,11 @@ void read_once(cudf::io::orc_reader_options const& options, template void chunked_read(cudf::io::orc_reader_options const& options, cudf::size_type num_rows_to_read, - cudf::size_type appox_num_chunks, + std::size_t output_limit, + std::size_t read_limit, Timer& timer) { - // Create a chunked reader that has an internal memory limits to process around 10 chunks. - auto const output_limit = static_cast(data_size / appox_num_chunks); - auto const input_limit = output_limit * 10; - - auto reader = cudf::io::chunked_orc_reader(output_limit, input_limit, options); + auto reader = cudf::io::chunked_orc_reader(output_limit, read_limit, options); cudf::size_type num_rows{0}; timer.start(); @@ -74,20 +71,21 @@ void orc_read_common(cudf::size_type num_rows_to_read, { auto const read_opts = cudf::io::orc_reader_options::builder(source_sink.make_source_info()).build(); - cudf::size_type constexpr approx_num_chunks = 10; auto mem_stats_logger = cudf::memory_stats_logger(); // init stats logger state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); - state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer, - [&](nvbench::launch&, auto& timer) { - try_drop_l3_cache(); - - if constexpr (!is_chunked_read) { - read_once(read_opts, num_rows_to_read, timer); - } else { - chunked_read(read_opts, num_rows_to_read, approx_num_chunks, timer); - } - }); + state.exec( + nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch&, auto& timer) { + try_drop_l3_cache(); + + if constexpr (!is_chunked_read) { + read_once(read_opts, num_rows_to_read, timer); + } else { + auto const output_limit = static_cast(state.get_int64("output_limit")); + auto const read_limit = static_cast(state.get_int64("read_limit")); + chunked_read(read_opts, num_rows_to_read, output_limit, read_limit, timer); + } + }); auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); @@ -213,4 +211,6 @@ NVBENCH_BENCH_TYPES(BM_orc_chunked_read_io_compression, .set_type_axes_names({"io", "compression"}) .set_min_samples(4) .add_int64_axis("cardinality", {0, 1000}) - .add_int64_axis("run_length", {1, 32}); + .add_int64_axis("run_length", {1, 32}) + .add_int64_axis("output_limit", {0, 500'000}) + .add_int64_axis("read_limit", {0, 500'000}); From bc34e40013cc5305d21b1b2a41fcbf4f5b15fd5e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Mar 2024 15:56:51 -0700 Subject: [PATCH 240/321] Fix python code Signed-off-by: Nghia Truong --- python/cudf/cudf/_lib/orc.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx index aaaeb558846..d3c75823471 100644 --- a/python/cudf/cudf/_lib/orc.pyx +++ b/python/cudf/cudf/_lib/orc.pyx @@ -329,7 +329,7 @@ cdef int64_t get_num_rows_arg(object arg) except*: arg = -1 if arg is None else arg if not isinstance(arg, int) or arg < -1: raise TypeError("num_rows must be an int >= -1") - return arg + return arg cdef orc_reader_options make_orc_reader_options( From de5ce7811622834bdf04887870642c0c759320a6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Mar 2024 16:07:04 -0700 Subject: [PATCH 241/321] Fix spell Signed-off-by: Nghia Truong --- cpp/benchmarks/io/orc/orc_reader_input.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index 140fbf7f8aa..8514af28c63 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -204,7 +204,7 @@ NVBENCH_BENCH_TYPES(BM_orc_read_io_compression, NVBENCH_TYPE_AXES(io_list, compr .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}); -// Should have the same parameters as `BM_orc_read_io_compression` for comparision. +// Should have the same parameters as `BM_orc_read_io_compression` for comparison. NVBENCH_BENCH_TYPES(BM_orc_chunked_read_io_compression, NVBENCH_TYPE_AXES(io_list, compression_list)) .set_name("orc_chunked_read_io_compression") From 56750bd4d442d92f2e6463f9fd8b8b84c7afca06 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Mar 2024 22:10:58 -0700 Subject: [PATCH 242/321] Disable mem stat Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 2 ++ cpp/src/io/orc/reader_impl.hpp | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index ab24b7c1eaf..82e1b1220ba 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -256,7 +256,9 @@ reader::impl::impl(std::size_t output_size_limit, rmm::mr::device_memory_resource* mr) : _stream(stream), _mr(mr), +#ifdef LOCAL_TEST mem_stats_logger(mr), +#endif _config{options.get_timestamp_type(), options.is_enabled_use_index(), options.is_enabled_use_np_dtypes(), diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index b3f91e5e92a..2173b90d30a 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -34,13 +34,12 @@ namespace cudf::io::orc::detail { +#ifdef LOCAL_TEST class memory_stats_logger { public: explicit memory_stats_logger(rmm::mr::device_memory_resource* mr) : existing_mr(mr) { -#ifdef LOCAL_TEST printf("exist mr: %p\n", mr); -#endif statistics_mr = std::make_unique>( @@ -62,6 +61,7 @@ class memory_stats_logger { rmm::mr::statistics_resource_adaptor> statistics_mr; }; +#endif struct reader_column_meta; @@ -184,7 +184,9 @@ class reader::impl { rmm::cuda_stream_view const _stream; rmm::mr::device_memory_resource* const _mr; +#ifdef LOCAL_TEST memory_stats_logger mem_stats_logger; +#endif // Reader configs. struct { From c97150ec59e409c698d0371470863b931eec06cf Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 14 Mar 2024 21:54:58 -0700 Subject: [PATCH 243/321] Change memory limits for data loading and decoding Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 2 +- cpp/src/io/orc/reader_impl_chunking.hpp | 15 ++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 075002276b8..f58b702ddac 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -748,7 +748,7 @@ void reader::impl::load_data() // If `data_read_limit` is too small, make sure not to pass 0 byte limit to compute splits. auto const tmp = static_cast(_chunk_read_data.data_read_limit * - (1.0 - chunk_read_data::load_limit_ratio)); + chunk_read_data::decode_limit_ratio); return tmp > 0UL ? tmp : 1UL; }(); _chunk_read_data.decode_stripe_ranges = diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index d43b5342eba..7e1e08e2d91 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -184,13 +184,18 @@ struct chunk_read_data { { } - // TODO: const for 3 below? std::size_t const output_size_limit; // maximum size (in bytes) of an output chunk, or 0 for no limit - std::size_t const data_read_limit; // approximate maximum size (in bytes) used for store - // intermediate data, or 0 for no limit - size_type const output_row_granularity; // TODO - static double constexpr load_limit_ratio{0.4}; // TODO + std::size_t const data_read_limit; // approximate maximum size (in bytes) used for store + // intermediate data, or 0 for no limit + size_type const output_row_granularity; + + // Memory limits for loading data and decoding are computed as + // `load/decode_limit_ratio * data_read_limit`. + // This is to maintain the total memory usage to be **around** the given `data_read_limit`. + // Note that sum of these limits may not be `1.0`, and their values are set empirically. + static double constexpr load_limit_ratio{0.25}; + static double constexpr decode_limit_ratio{0.6}; // Chunks of stripes that can be load into memory such that their data size is within a size // limit. From 710734cbdb4b48f30dda6b071d0f6c7cc7ff2708 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 14 Mar 2024 22:10:12 -0700 Subject: [PATCH 244/321] Fix tests due to changing internal parameters Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cu | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 862324e5aa8..78f0894134c 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1066,7 +1066,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, SingleFixedWidthColumn) } { - int constexpr expected[] = {10, 13, 10}; + int constexpr expected[] = {17, 13, 10}; input_limit_test_read( __LINE__, test_files, input, output_limit{0UL}, input_limit{2 * 1024 * 1024UL}, expected); } @@ -1102,7 +1102,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, MixedColumns) } { - int constexpr expected[] = {10, 50, 15}; + int constexpr expected[] = {17, 50, 17}; input_limit_test_read( __LINE__, test_files, input, output_limit{0UL}, input_limit{2 * 1024 * 1024UL}, expected); } @@ -1169,7 +1169,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, ListType) input_limit_test_write(test_files, input, cudf::io::default_stripe_size_rows); { - int constexpr expected[] = {2, 40, 3}; + int constexpr expected[] = {3, 40, 3}; input_limit_test_read( __LINE__, test_files, input, output_limit{0UL}, input_limit{5 * 1024 * 1024UL}, expected); } @@ -1252,13 +1252,13 @@ TEST_F(OrcChunkedReaderInputLimitTest, MixedColumnsHavingList) input_limit_test_write(test_files, input, cudf::io::default_stripe_size_rows); { - int constexpr expected[] = {8, 8, 6}; + int constexpr expected[] = {13, 8, 6}; input_limit_test_read( __LINE__, test_files, input, output_limit{0UL}, input_limit{128 * 1024 * 1024UL}, expected); } { - int constexpr expected[] = {16, 15, 17}; + int constexpr expected[] = {13, 15, 17}; input_limit_test_read(__LINE__, test_files, input, From 7b890947acf9f9fbb411710431a2ea47f901e4a9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 15 Mar 2024 10:07:53 -0700 Subject: [PATCH 245/321] Cleanup Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 127 ++++--------------------- cpp/src/io/orc/reader_impl.hpp | 48 +++------- cpp/src/io/orc/reader_impl_chunking.cu | 58 +++++------ 3 files changed, 53 insertions(+), 180 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 82e1b1220ba..140e4517862 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -14,45 +14,13 @@ * limitations under the License. */ -// TODO: remove -#include - -// -// -// -#include "io/comp/gpuinflate.hpp" -#include "io/comp/nvcomp_adapter.hpp" #include "io/orc/reader_impl.hpp" #include "io/orc/reader_impl_chunking.hpp" #include "io/orc/reader_impl_helpers.hpp" -#include "io/utilities/config_utils.hpp" #include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include #include -#include namespace cudf::io::orc::detail { @@ -61,55 +29,33 @@ void reader::impl::prepare_data(read_mode mode) // There are no columns in the table. if (_selected_columns.num_levels() == 0) { return; } -#ifdef LOCAL_TEST - std::cout << "call global, skip = " << _config.skip_rows << std::endl; -#endif - + // This will be no-op if it was called before. global_preprocess(mode); if (!_chunk_read_data.more_table_chunk_to_output()) { if (!_chunk_read_data.more_stripe_to_decode() && _chunk_read_data.more_stripe_to_load()) { -#ifdef LOCAL_TEST - printf("load more data\n\n"); -#endif - + // Only load stripe data if: + // - There is more stripe to load, and + // - All loaded stripes were decoded, and + // - All the decoded results were output. load_data(); } - if (_chunk_read_data.more_stripe_to_decode()) { -#ifdef LOCAL_TEST - printf("decode more data\n\n"); -#endif - + // Only decompress/decode the loaded stripes if: + // - There are loaded stripes that were not decoded yet, and + // - All the decoded results were output. decompress_and_decode(); } } - -#ifdef LOCAL_TEST - printf("done load and decode data\n\n"); -#endif } table_with_metadata reader::impl::make_output_chunk() { -#ifdef LOCAL_TEST - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << "start to make out, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } -#endif - // There is no columns in the table. if (_selected_columns.num_levels() == 0) { return {std::make_unique
(), table_metadata{}}; } - // If no rows or stripes to read, return empty columns + // If no rows or stripes to read, return empty columns. if (!_chunk_read_data.more_table_chunk_to_output()) { -#ifdef LOCAL_TEST - printf("has no next\n"); -#endif - std::vector> out_columns; auto out_metadata = get_meta_with_user_data(); std::transform(_selected_columns.levels[0].begin(), @@ -128,43 +74,23 @@ table_with_metadata reader::impl::make_output_chunk() return {std::make_unique
(std::move(out_columns)), std::move(out_metadata)}; } - auto out_table = [&] { + auto const make_output_table = [&] { if (_chunk_read_data.output_table_ranges.size() == 1) { - // Must change the index of output range, so calling `has_next()` after that - // can return the correct answer. + // Must change the index of the current output range such that calling `has_next()` after + // this will return the correct answer (`false`, since there is only one range). _chunk_read_data.curr_output_table_range++; -#ifdef LOCAL_TEST - printf("one chunk, no more table---------------------------------\n"); -#endif - // If there is no slicing, just hand over the decoded table. - return std::move(_chunk_read_data.decoded_table); - } -#ifdef LOCAL_TEST - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << "prepare to make out, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; + // Just hand over the decoded table without slicing. + return std::move(_chunk_read_data.decoded_table); } -#endif + // The range of rows in the decoded table to output. auto const out_range = _chunk_read_data.output_table_ranges[_chunk_read_data.curr_output_table_range++]; auto const out_tview = cudf::detail::slice( _chunk_read_data.decoded_table->view(), {static_cast(out_range.begin), static_cast(out_range.end)}, _stream)[0]; - -#ifdef LOCAL_TEST - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << "done make out, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } -#endif - auto output = std::make_unique
(out_tview, _stream, _mr); // If this is the last slice, we also delete the decoded table to free up memory. @@ -173,25 +99,9 @@ table_with_metadata reader::impl::make_output_chunk() } return output; - }(); - -#ifdef LOCAL_TEST - if (!_chunk_read_data.has_next()) { - static int count{0}; - count++; - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << "complete, " << count << ", peak_memory_usage: " << peak_mem - << " , MB = " << (peak_mem * 1.0) / (1024.0 * 1024.0) << std::endl; - } else { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << "done, partial, peak_memory_usage: " << peak_mem - << " , MB = " << (peak_mem * 1.0) / (1024.0 * 1024.0) << std::endl; - } -#endif + }; - return {std::move(out_table), _out_metadata}; + return {make_output_table(), table_metadata{_out_metadata} /*copy cached metadata*/}; } table_metadata reader::impl::get_meta_with_user_data() @@ -256,9 +166,6 @@ reader::impl::impl(std::size_t output_size_limit, rmm::mr::device_memory_resource* mr) : _stream(stream), _mr(mr), -#ifdef LOCAL_TEST - mem_stats_logger(mr), -#endif _config{options.get_timestamp_type(), options.is_enabled_use_index(), options.is_enabled_use_np_dtypes(), diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 2173b90d30a..45d60acb3db 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -24,7 +24,6 @@ #include #include -#include // TODO: remove #include @@ -34,35 +33,6 @@ namespace cudf::io::orc::detail { -#ifdef LOCAL_TEST -class memory_stats_logger { - public: - explicit memory_stats_logger(rmm::mr::device_memory_resource* mr) : existing_mr(mr) - { - printf("exist mr: %p\n", mr); - - statistics_mr = - std::make_unique>( - existing_mr); - - rmm::mr::set_current_device_resource(statistics_mr.get()); - } - - ~memory_stats_logger() { rmm::mr::set_current_device_resource(existing_mr); } - - [[nodiscard]] size_t peak_memory_usage() const noexcept - { - return statistics_mr->get_bytes_counter().peak; - } - - private: - rmm::mr::device_memory_resource* existing_mr; - static inline std::unique_ptr< - rmm::mr::statistics_resource_adaptor> - statistics_mr; -}; -#endif - struct reader_column_meta; /** @@ -73,6 +43,9 @@ class reader::impl { /** * @brief Constructor from a dataset source with reader options. * + * This constructor will call the other constructor with `output_size_limit` and `data_read_limit` + * set to `0` and `output_row_granularity` set to `DEFAULT_OUTPUT_ROW_GRANULARITY`. + * * @param sources Dataset sources * @param options Settings for controlling reading behavior * @param stream CUDA stream used for device memory operations and kernel launches @@ -84,7 +57,8 @@ class reader::impl { rmm::mr::device_memory_resource* mr); /** - * @copydoc cudf::io::orc::detail::chunked_reader + * @copydoc cudf::io::orc::detail::chunked_reader::chunked_reader(std::size_t, std::size_t, + * orc_reader_options const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) */ explicit impl(std::size_t output_size_limit, std::size_t data_read_limit, @@ -93,6 +67,10 @@ class reader::impl { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); + /** + * @copydoc cudf::io::orc::detail::chunked_reader::chunked_reader(std::size_t, std::size_t, + * size_type, orc_reader_options const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) + */ explicit impl(std::size_t output_size_limit, std::size_t data_read_limit, size_type output_row_granularity, @@ -140,7 +118,7 @@ class reader::impl { * data streams of the selected columns in all stripes are generated. If the reader has a data * read limit, sizes of these streams are used to split the list of all stripes into multiple * subsets, each of which will be read into memory in the `load_data()` step. These subsets are - * computed such that memory usage will be capped around a fixed size limit. + * computed such that memory usage will be kept to be around a fixed size limit. * * @param mode Value indicating if the data sources are read all at once or chunk by chunk */ @@ -184,10 +162,6 @@ class reader::impl { rmm::cuda_stream_view const _stream; rmm::mr::device_memory_resource* const _mr; -#ifdef LOCAL_TEST - memory_stats_logger mem_stats_logger; -#endif - // Reader configs. struct { data_type timestamp_type; // override output timestamp resolution @@ -215,7 +189,7 @@ class reader::impl { std::vector> _out_buffers; // The default value used for subdividing the decoded table for final output. - static constexpr size_type DEFAULT_OUTPUT_ROW_GRANULARITY = 10'000; + static inline constexpr size_type DEFAULT_OUTPUT_ROW_GRANULARITY = 10'000; }; } // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index f58b702ddac..5635caa58ec 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -188,8 +188,8 @@ std::vector find_splits(host_span cumulative_sizes, auto const end = start + cumulative_sizes.size(); while (cur_count < total_count) { - int64_t split_pos = - thrust::distance(start, thrust::lower_bound(thrust::seq, start + cur_pos, end, size_limit)); + int64_t split_pos = static_cast( + thrust::distance(start, thrust::lower_bound(thrust::seq, start + cur_pos, end, size_limit))); // If we're past the end, or if the returned range has size exceeds the given size limit, // move back one position. @@ -208,9 +208,9 @@ std::vector find_splits(host_span cumulative_sizes, } } - // In case we have moved back too in the steps above, far beyond the last split point: that - // means we cannot find any range that has size fits within the given size limit. - // In such case, we need to move forward until we move pass the last output range. + // In case we have moved back too much in the steps above, far beyond the last split point, that + // means we could not find any range that has size fits within the given size limit. + // In such situations, we need to move forward until we move pass the last output range. while (split_pos < (static_cast(cumulative_sizes.size()) - 1) && (split_pos < 0 || cumulative_sizes[split_pos].count <= cur_count)) { split_pos++; @@ -227,7 +227,7 @@ std::vector find_splits(host_span cumulative_sizes, } } - // If the last range has size smaller than `merge_threshold` percent of the second last one, + // If the last range has size smaller than `merge_threshold` the size of the second last one, // merge it with the second last one. // This is to prevent having too small trailing range. if (splits.size() > 1) { @@ -243,6 +243,8 @@ std::vector find_splits(host_span cumulative_sizes, return splits; } +// Since `find_splits` is a template function, we need to explicitly instantiate it so it can be +// used outside of this TU. template std::vector find_splits(host_span sizes, std::size_t total_count, std::size_t size_limit); @@ -264,7 +266,9 @@ void reader::impl::global_preprocess(read_mode mode) if (_file_itm_data.global_preprocessed) { return; } _file_itm_data.global_preprocessed = true; - // Load stripes's metadata. + // + // Load stripes' metadata: + // std::tie( _file_itm_data.rows_to_skip, _file_itm_data.rows_to_read, _file_itm_data.selected_stripes) = _metadata.select_stripes( @@ -274,30 +278,15 @@ void reader::impl::global_preprocess(read_mode mode) CUDF_EXPECTS( mode == read_mode::CHUNKED_READ || _file_itm_data.rows_to_read <= static_cast(std::numeric_limits::max()), - "Number or rows to read exceeds the column size limit in READ_ALL mode.", + "READ_ALL mode does not support reading number of rows more than cudf's column size limit.", std::overflow_error); -#ifdef LOCAL_TEST - { - auto const skip_rows = _config.skip_rows; - auto const num_rows_opt = _config.num_read_rows; - printf("input skip rows: %ld, num rows: %ld\n", skip_rows, num_rows_opt.value_or(-1l)); - printf("actual skip rows: %ld, num rows: %ld\n", - _file_itm_data.rows_to_skip, - _file_itm_data.rows_to_read); - } -#endif - auto const& selected_stripes = _file_itm_data.selected_stripes; auto const num_total_stripes = selected_stripes.size(); auto const num_levels = _selected_columns.num_levels(); -#ifdef LOCAL_TEST - printf("num load stripe: %d\n", (int)num_total_stripes); -#endif - // - // Pre allocate necessary memory for data processed in the next steps: + // Pre allocate necessary memory for data processed in the other reading steps: // auto& stripe_data_read_ranges = _file_itm_data.stripe_data_read_ranges; stripe_data_read_ranges.resize(num_total_stripes); @@ -320,6 +309,10 @@ void reader::impl::global_preprocess(read_mode mode) auto& read_info = _file_itm_data.data_read_info; auto& col_meta = *_col_meta; + // + // Collect columns' types. + // + for (std::size_t level = 0; level < num_levels; ++level) { lvl_stripe_sizes[level].resize(num_total_stripes); lvl_stripe_stream_ranges[level].resize(num_total_stripes); @@ -372,10 +365,10 @@ void reader::impl::global_preprocess(read_mode mode) } // - // Load all stripes' metadata. + // Collect all data streams' information: // - // Collect total data size for all data streams in each stripe. + // Accumulate data size for data streams in each stripe. cudf::detail::hostdevice_vector total_stripe_sizes(num_total_stripes, _stream); for (std::size_t stripe_global_idx = 0; stripe_global_idx < num_total_stripes; @@ -384,11 +377,10 @@ void reader::impl::global_preprocess(read_mode mode) auto const stripe_info = stripe.stripe_info; auto const stripe_footer = stripe.stripe_footer; - std::size_t stripe_size{0}; + std::size_t this_stripe_size{0}; auto const last_read_size = read_info.size(); for (std::size_t level = 0; level < num_levels; ++level) { - auto& stream_info = _file_itm_data.lvl_stream_info[level]; - auto& stripe_sizes = lvl_stripe_sizes[level]; + auto& stream_info = _file_itm_data.lvl_stream_info[level]; auto stream_level_count = stream_info.size(); auto const stripe_level_size = @@ -401,7 +393,7 @@ void reader::impl::global_preprocess(read_mode mode) false, // use_index, level == 0, nullptr, // num_dictionary_entries - nullptr, // stream_processing_order + nullptr, // local_stream_order &stream_info, std::nullopt // chunks ); @@ -410,8 +402,8 @@ void reader::impl::global_preprocess(read_mode mode) CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, "Invalid index rowgroup stream data"); - stripe_sizes[stripe_global_idx] = stripe_level_size; - stripe_size += stripe_level_size; + lvl_stripe_sizes[level][stripe_global_idx] = stripe_level_size; + this_stripe_size += stripe_level_size; lvl_stripe_stream_ranges[level][stripe_global_idx] = range{stream_level_count, stream_info.size()}; @@ -431,7 +423,7 @@ void reader::impl::global_preprocess(read_mode mode) read_info.emplace_back(offset, d_dst, len, stripe.source_idx, stripe_global_idx, level); } } - total_stripe_sizes[stripe_global_idx] = {1, stripe_size}; + total_stripe_sizes[stripe_global_idx] = {1, this_stripe_size}; stripe_data_read_ranges[stripe_global_idx] = range{last_read_size, read_info.size()}; } From cb41a6525840a84dfbf97aca78dabd7ee6e7e6c2 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 15 Mar 2024 10:43:26 -0700 Subject: [PATCH 246/321] Cleanup Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 51 ++++++-------------------- 1 file changed, 11 insertions(+), 40 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 5635caa58ec..da7eacf24ed 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -405,10 +405,11 @@ void reader::impl::global_preprocess(read_mode mode) lvl_stripe_sizes[level][stripe_global_idx] = stripe_level_size; this_stripe_size += stripe_level_size; + // Range of the streams in `stream_info` corresponding to this stripe at the current level. lvl_stripe_stream_ranges[level][stripe_global_idx] = range{stream_level_count, stream_info.size()}; - // Coalesce consecutive streams into one read + // Coalesce consecutive streams into one read. while (not is_stripe_data_empty and stream_level_count < stream_info.size()) { auto const d_dst = stream_info[stream_level_count].dst_pos; auto const offset = stream_info[stream_level_count].offset; @@ -422,8 +423,11 @@ void reader::impl::global_preprocess(read_mode mode) } read_info.emplace_back(offset, d_dst, len, stripe.source_idx, stripe_global_idx, level); } - } - total_stripe_sizes[stripe_global_idx] = {1, this_stripe_size}; + } // end loop level + + total_stripe_sizes[stripe_global_idx] = {1, this_stripe_size}; + + // Range of all stream reads in `read_info` corresponding to this stripe, in all levels. stripe_data_read_ranges[stripe_global_idx] = range{last_read_size, read_info.size()}; } @@ -433,26 +437,12 @@ void reader::impl::global_preprocess(read_mode mode) _chunk_read_data.curr_load_stripe_range = 0; - // Load all chunks if there is no read limit. + // Load all stripes if there is no read limit. if (_chunk_read_data.data_read_limit == 0) { -#ifdef LOCAL_TEST - printf("0 limit: output load stripe chunk = 0, %d\n", (int)num_total_stripes); -#endif - - _chunk_read_data.load_stripe_ranges = {range{0ul, num_total_stripes}}; + _chunk_read_data.load_stripe_ranges = {range{0UL, num_total_stripes}}; return; } -#ifdef LOCAL_TEST - printf("total stripe sizes:\n"); - int count{0}; - for (auto& size : total_stripe_sizes) { - ++count; - printf("size: %ld, %zu\n", size.count, size.size_bytes); - if (count > 5) break; - } -#endif - // TODO: exec_policy_nosync // Compute the prefix sum of stripes' data sizes. total_stripe_sizes.host_to_device_async(_stream); @@ -461,36 +451,17 @@ void reader::impl::global_preprocess(read_mode mode) total_stripe_sizes.d_end(), total_stripe_sizes.d_begin(), cumulative_size_sum{}); - total_stripe_sizes.device_to_host_sync(_stream); -#ifdef LOCAL_TEST - count = 0; - printf("prefix sum total stripe sizes:\n"); - for (auto& size : total_stripe_sizes) { - ++count; - printf("size: %ld, %zu\n", size.count, size.size_bytes); - if (count > 5) break; - } -#endif - auto const load_limit = [&] { auto const tmp = static_cast(_chunk_read_data.data_read_limit * chunk_read_data::load_limit_ratio); - // Make sure not to pass 0 byte limit (due to round-off) to compute splits. + // Make sure not to pass 0 byte limit (due to round-off) to `find_splits`. return tmp > 0UL ? tmp : 1UL; }(); + _chunk_read_data.load_stripe_ranges = find_splits(total_stripe_sizes, num_total_stripes, load_limit); - -#ifdef LOCAL_TEST - auto& splits = _chunk_read_data.load_stripe_ranges; - printf("------------\nSplits (/total num stripe = %d): \n", (int)num_total_stripes); - for (size_t idx = 0; idx < splits.size(); idx++) { - printf("{%ld, %ld}\n", splits[idx].begin, splits[idx].end); - } - fflush(stdout); -#endif } // Load each chunk from `load_stripe_chunks`. From d68562c9a7ed1a9cb16f2bba28434ccb36c6185a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 15 Mar 2024 11:35:12 -0700 Subject: [PATCH 247/321] Fix a bug in stripe rows computation Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 39 ++++++++++++-------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index da7eacf24ed..ca6f64b94e4 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -464,7 +464,6 @@ void reader::impl::global_preprocess(read_mode mode) find_splits(total_stripe_sizes, num_total_stripes, load_limit); } -// Load each chunk from `load_stripe_chunks`. void reader::impl::load_data() { if (_file_itm_data.has_no_data()) { return; } @@ -475,16 +474,11 @@ void reader::impl::load_data() auto const stripe_end = load_stripe_range.end; auto const stripe_count = stripe_end - stripe_start; - auto const num_levels = _selected_columns.num_levels(); - -#ifdef LOCAL_TEST - printf("\n\nloading data from stripe %d -> %d\n", (int)stripe_start, (int)stripe_end); -#endif - auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; + auto const num_levels = _selected_columns.num_levels(); // Prepare the buffer to read raw data onto. - for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + for (std::size_t level = 0; level < num_levels; ++level) { auto& stripe_data = lvl_stripe_data[level]; stripe_data.resize(stripe_count); @@ -499,12 +493,12 @@ void reader::impl::load_data() // Load stripe data into memory: // - // After loading data from sources into host buffers, we need to transfer (async) data to device. - // Such host buffers need to be kept alive until we sync device. + // If we load data from sources into host buffers, we need to transfer (async) data to device + // memory. Such host buffers need to be kept alive until we sync the transfers. std::vector> host_read_buffers; - // If we load data directly from sources into device, we also need to the entire read tasks. - // Thus, we need to keep all read tasks alive and sync all together. + // If we load data directly from sources into device, the loads are also async. + // Thus, we need to make sure to sync all them at the end. std::vector, std::size_t>> read_tasks; auto const [read_begin, read_end] = @@ -543,7 +537,7 @@ void reader::impl::load_data() // Split list of all stripes into subsets that be loaded separately without blowing up memory: // - // A map from stripe source into `CompressedStreamInfo*` pointer. + // A map from a stripe sources into `CompressedStreamInfo*` pointers. // These pointers are then used to retrieve stripe/level decompressed sizes for later // decompression and decoding. stream_source_map stream_compinfo_map; @@ -551,17 +545,20 @@ void reader::impl::load_data() // For estimating the decompressed sizes of the loaded stripes. cudf::detail::hostdevice_vector stripe_decomp_sizes(stripe_count, _stream); - std::size_t num_loaded_stripes{0}; - for (std::size_t stripe_idx = 0; stripe_idx < stripe_count; ++stripe_idx) { - auto const& stripe = _file_itm_data.selected_stripes[stripe_idx]; - auto const stripe_info = stripe.stripe_info; - stripe_decomp_sizes[stripe_idx] = cumulative_size_and_row{1, 0, stripe_info->numberOfRows}; - num_loaded_stripes += stripe_info->numberOfRows; + + // Number of rows in the loading stripes. + std::size_t num_loading_rows{0}; + + for (std::size_t idx = 0; idx < stripe_count; ++idx) { + auto const& stripe = _file_itm_data.selected_stripes[idx + stripe_start]; + auto const stripe_info = stripe.stripe_info; + stripe_decomp_sizes[idx] = cumulative_size_and_row{1, 0, stripe_info->numberOfRows}; + num_loading_rows += stripe_info->numberOfRows; } auto& compinfo_map = _file_itm_data.compinfo_map; - for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { + for (std::size_t level = 0; level < num_levels; ++level) { auto const& stream_info = _file_itm_data.lvl_stream_info[level]; auto const num_columns = _selected_columns.levels[level].size(); @@ -661,7 +658,7 @@ void reader::impl::load_data() if (_chunk_read_data.data_read_limit == 0 && // In addition to not have any read limit, we also need to check if the the total number of // rows in the loaded stripes exceeds column size limit. - num_loaded_stripes < static_cast(std::numeric_limits::max())) { + num_loading_rows < static_cast(std::numeric_limits::max())) { #ifdef LOCAL_TEST printf("0 limit: output decode stripe chunk unchanged\n"); #endif From 86ec4367951749db57ac23de18e33f56ec18eaa4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 15 Mar 2024 12:06:32 -0700 Subject: [PATCH 248/321] Cleanup `reader_impl_chunking.cu` Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 169 +++++------------------- cpp/src/io/orc/reader_impl_chunking.hpp | 22 +-- 2 files changed, 47 insertions(+), 144 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index ca6f64b94e4..e5d17204fb9 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -15,60 +15,26 @@ */ #include "io/comp/gpuinflate.hpp" -#include "io/comp/nvcomp_adapter.hpp" #include "io/orc/reader_impl.hpp" #include "io/orc/reader_impl_chunking.hpp" #include "io/orc/reader_impl_helpers.hpp" -#include "io/utilities/config_utils.hpp" -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include #include -#include #include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include +#include +#include #include -#include -#include -#include - -// -// -// -#include - -#include -// -// -// +#include namespace cudf::io::orc::detail { std::size_t gather_stream_info_and_column_desc( - std::size_t global_stripe_order, + std::size_t stripe_order, std::size_t level, orc::StripeInformation const* stripeinfo, orc::StripeFooter const* stripefooter, @@ -124,7 +90,7 @@ std::size_t gather_stream_info_and_column_desc( if (child_idx >= 0) { col = child_idx; if (chunks.has_value()) { - auto& chunk = (*chunks.value())[global_stripe_order][col]; + auto& chunk = (*chunks.value())[stripe_order][col]; chunk.strm_id[gpu::CI_PRESENT] = *local_stream_order; chunk.strm_len[gpu::CI_PRESENT] = stream.length; } @@ -136,7 +102,7 @@ std::size_t gather_stream_info_and_column_desc( if (src_offset >= stripeinfo->indexLength || use_index) { auto const index_type = get_stream_index_type(stream.kind); if (index_type < gpu::CI_NUM_STREAMS) { - auto& chunk = (*chunks.value())[global_stripe_order][col]; + auto& chunk = (*chunks.value())[stripe_order][col]; chunk.strm_id[index_type] = *local_stream_order; chunk.strm_len[index_type] = stream.length; // NOTE: skip_count field is temporarily used to track the presence of index streams @@ -157,7 +123,7 @@ std::size_t gather_stream_info_and_column_desc( stripeinfo->offset + src_offset, dst_offset, stream.length, - stream_source_info{global_stripe_order, level, column_id, stream.kind}); + stream_source_info{stripe_order, level, column_id, stream.kind}); } dst_offset += stream.length; @@ -432,9 +398,11 @@ void reader::impl::global_preprocess(read_mode mode) } // - // Split list of all stripes into subsets that be loaded separately without blowing up memory: + // Split range of all stripes into subranges that can be loaded separately without blowing up + // memory: // + // Load range is reset to start from the first position in `load_stripe_ranges`. _chunk_read_data.curr_load_stripe_range = 0; // Load all stripes if there is no read limit. @@ -497,27 +465,28 @@ void reader::impl::load_data() // memory. Such host buffers need to be kept alive until we sync the transfers. std::vector> host_read_buffers; - // If we load data directly from sources into device, the loads are also async. + // If we load data directly from sources into device memory, the loads are also async. // Thus, we need to make sure to sync all them at the end. std::vector, std::size_t>> read_tasks; + // Range of the read info (offset, length) to read for the current being loaded stripes. auto const [read_begin, read_end] = get_range(_file_itm_data.stripe_data_read_ranges, load_stripe_range); for (auto read_idx = read_begin; read_idx < read_end; ++read_idx) { auto const& read_info = _file_itm_data.data_read_info[read_idx]; - auto const source = _metadata.per_file_metadata[read_info.source_idx].source; + auto const source_ptr = _metadata.per_file_metadata[read_info.source_idx].source; auto const dst_base = static_cast( lvl_stripe_data[read_info.level][read_info.stripe_idx - stripe_start].data()); - if (source->is_device_read_preferred(read_info.length)) { + if (source_ptr->is_device_read_preferred(read_info.length)) { read_tasks.push_back( - std::pair(source->device_read_async( + std::pair(source_ptr->device_read_async( read_info.offset, read_info.length, dst_base + read_info.dst_pos, _stream), read_info.length)); } else { - auto buffer = source->host_read(read_info.offset, read_info.length); + auto buffer = source_ptr->host_read(read_info.offset, read_info.length); CUDF_EXPECTS(buffer->size() == read_info.length, "Unexpected discrepancy in bytes read."); CUDF_CUDA_TRY(cudaMemcpyAsync(dst_base + read_info.dst_pos, buffer->data(), @@ -528,13 +497,17 @@ void reader::impl::load_data() } } - if (host_read_buffers.size() > 0) { _stream.synchronize(); } + if (host_read_buffers.size() > 0) { + _stream.synchronize(); + host_read_buffers.clear(); + } for (auto& task : read_tasks) { CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); } // - // Split list of all stripes into subsets that be loaded separately without blowing up memory: + // Split range of loaded stripes into subranges that can be decoded separately without blowing up + // memory: // // A map from a stripe sources into `CompressedStreamInfo*` pointers. @@ -565,6 +538,7 @@ void reader::impl::load_data() auto& stripe_data = lvl_stripe_data[level]; if (stripe_data.empty()) { continue; } + // Range of all streams in the loaded stripes. auto const stream_range = get_range(_file_itm_data.lvl_stripe_stream_ranges[level], load_stripe_range); auto const num_streams = stream_range.end - stream_range.begin; @@ -572,8 +546,8 @@ void reader::impl::load_data() if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; - // Cannot be cached as-is, since this is for streams in a loaded stripe range, while - // the latter decompression/decoding step will use a different stripe range. + // Cannot be cached as-is, since this is for streams in the current loaded stripe range, + // while the decompression/decoding step would probably use just a subrange of it. cudf::detail::hostdevice_vector compinfo(0, num_streams, _stream); for (auto stream_idx = stream_range.begin; stream_idx < stream_range.end; ++stream_idx) { @@ -585,17 +559,6 @@ void reader::impl::load_data() stream_compinfo_map[stream_source_info{ info.source.stripe_idx, info.source.level, info.source.orc_col_idx, info.source.kind}] = &compinfo.back(); - -#ifdef LOCAL_TEST - printf("collec stream [%d, %d, %d, %d]: dst = %lu, length = %lu\n", - (int)info.source.stripe_idx, - (int)info.source.level, - (int)info.source.orc_col_idx, - (int)info.source.kind, - info.dst_pos, - info.length); - fflush(stdout); -#endif } compinfo.host_to_device_async(_stream); @@ -613,38 +576,17 @@ void reader::impl::load_data() stream_compinfo->max_uncompressed_size}; stripe_decomp_sizes[stream_id.stripe_idx - stripe_start].size_bytes += stream_compinfo->max_uncompressed_size; - -#ifdef LOCAL_TEST - printf("cache info [%d, %d, %d, %d]: %lu | %lu | %lu\n", - (int)stream_id.stripe_idx, - (int)stream_id.level, - (int)stream_id.orc_col_idx, - (int)stream_id.kind, - (size_t)stream_compinfo->num_compressed_blocks, - (size_t)stream_compinfo->num_uncompressed_blocks, - stream_compinfo->max_uncompressed_size); - fflush(stdout); -#endif } - // Important: must clear this map since the next level will have similar keys. + // Important: must clear this map to reuse the (empty) map for processing the next level. stream_compinfo_map.clear(); - - } else { -#ifdef LOCAL_TEST - printf("no compression \n"); - fflush(stdout); -#endif - - // Set decompression size equal to the input size. + } else { // no decompression + // Set decompression sizes equal to the input sizes. for (auto stream_idx = stream_range.begin; stream_idx < stream_range.end; ++stream_idx) { auto const& info = stream_info[stream_idx]; stripe_decomp_sizes[info.source.stripe_idx - stripe_start].size_bytes += info.length; } } - - // printf(" end level %d\n\n", (int)level); - } // end loop level // Decoding range is reset to start from the first position in `decode_stripe_ranges`. @@ -652,34 +594,18 @@ void reader::impl::load_data() // Decode all loaded stripes if there is no read limit. // In theory, we should just decode enough stripes for output one table chunk, instead of - // decoding all stripes like this. + // decoding all stripes like this, for better load-balancing and reduce memory usage. // However, we do not know how many stripes are 'enough' because there is not any simple and - // cheap way to compute the exact decoded sizes of stripes. + // cheap way to compute the exact decoded sizes of stripes without actually decoding them. if (_chunk_read_data.data_read_limit == 0 && - // In addition to not have any read limit, we also need to check if the the total number of + // In addition to read limit, we also need to check if the the total number of // rows in the loaded stripes exceeds column size limit. + // If that is the case, we cannot read all stripes at once. num_loading_rows < static_cast(std::numeric_limits::max())) { -#ifdef LOCAL_TEST - printf("0 limit: output decode stripe chunk unchanged\n"); -#endif - _chunk_read_data.decode_stripe_ranges = {load_stripe_range}; return; } -#ifdef LOCAL_TEST - // TODO: remove - if (_chunk_read_data.data_read_limit == 0) { printf("0 limit but size overflow\n"); } - - { - int count{0}; - for (auto& size : stripe_decomp_sizes) { - printf("decomp stripe size: %ld, %zu, %zu\n", size.count, size.size_bytes, size.rows); - if (count++ > 5) break; - } - } -#endif - // TODO: exec_policy_nosync // Compute the prefix sum of stripe data sizes and rows. stripe_decomp_sizes.host_to_device_async(_stream); @@ -690,50 +616,27 @@ void reader::impl::load_data() cumulative_size_sum{}); stripe_decomp_sizes.device_to_host_sync(_stream); -#ifdef LOCAL_TEST - { - int count{0}; - for (auto& size : stripe_decomp_sizes) { - printf( - "prefix sum decomp stripe size: %ld, %zu, %zu\n", size.count, size.size_bytes, size.rows); - if (count++ > 5) break; - } - } -#endif - auto const decode_limit = [&] { // In this case, we have no read limit but have to split due to having number of rows in loaded // stripes exceeds column size limit. So we will split based on row number, not data size. if (_chunk_read_data.data_read_limit == 0) { return std::numeric_limits::max(); } - // If `data_read_limit` is too small, make sure not to pass 0 byte limit to compute splits. + // If `data_read_limit` is too small, make sure not to pass 0 byte limit to `find_splits`. auto const tmp = static_cast(_chunk_read_data.data_read_limit * chunk_read_data::decode_limit_ratio); return tmp > 0UL ? tmp : 1UL; }(); + _chunk_read_data.decode_stripe_ranges = find_splits(stripe_decomp_sizes, stripe_count, decode_limit); // The split ranges always start from zero. - // We need to update the ranges to start from `stripe_start` which is covererd by the current - // range of loaded stripes. + // We need to change these ranges to start from `stripe_start` which are the correct subranges of + // the current loaded stripe range. for (auto& range : _chunk_read_data.decode_stripe_ranges) { range.begin += stripe_start; range.end += stripe_start; } - -#ifdef LOCAL_TEST - auto& splits = _chunk_read_data.decode_stripe_ranges; - printf("------------\nSplits decode_stripe_chunks (/%d): \n", (int)stripe_count); - for (size_t idx = 0; idx < splits.size(); idx++) { - printf("{%ld, %ld}\n", splits[idx].begin, splits[idx].end); - } - - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << "load, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - fflush(stdout); -#endif } } // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 7e1e08e2d91..f77ca173d35 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -298,28 +298,28 @@ range get_range(std::vector const& input_ranges, range const& selected_ra * @brief Function that populates descriptors for either individual streams or chunks of column * data, but not both. * - * This function is used in the global step, to gather information for streams of all stripes in - * the data sources (when `stream_info` is present). Later on, it is used again to populate column - * descriptors (`chunks` is present) during decompression and decoding. The two steps share - * most of the execution path thus this function takes mutually exclusive parameters `stream_info` - * or `chunks` depending on each use case. + * This function is firstly used in the global step, to gather information for streams of all + * stripes in the data sources (when `stream_info` is present). Later on, it is used again to + * populate column descriptors (`chunks` is present) during decompression and decoding. The two + * steps share most of the execution path thus this function takes mutually exclusive parameters + * `stream_info` or `chunks` depending on each use case. * - * @param global_stripe_order The global index of the current decoding stripe - * @param level The nested level of the current decoding column - * @param stripeinfo The pointer to current decoding stripe's information - * @param stripefooter The pointer to current decoding stripe's footer + * @param stripe_order The index of the current stripe, can be global index or local decoding index + * @param level The current processing nested level + * @param stripeinfo The pointer to current stripe's information + * @param stripefooter The pointer to current stripe's footer * @param orc2gdf The mapping from ORC column ids to gdf column ids * @param types The schema type * @param use_index Whether to use the row index for parsing * @param apply_struct_map Indicating if this is the root level * @param num_dictionary_entries The number of dictionary entries - * @param local_stream_order For retrieving 0-based orders of streams in the current decoding step + * @param local_stream_order For retrieving 0-based orders of streams in the decoding step * @param stream_info The vector of streams' information * @param chunks The vector of column descriptors * @return The number of bytes in the gathered streams */ std::size_t gather_stream_info_and_column_desc( - std::size_t global_stripe_order, + std::size_t stripe_order, std::size_t level, orc::StripeInformation const* stripeinfo, orc::StripeFooter const* stripefooter, From 0f78b0d930b0f121b9bb8aa89e88d059ed0d62fb Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 15 Mar 2024 13:00:15 -0700 Subject: [PATCH 249/321] Cleanup `reader_impl_decode.cu` Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_decode.cu | 332 +++------------------------ 1 file changed, 30 insertions(+), 302 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 82ca39e6e57..2aab392cd6b 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -14,15 +14,6 @@ * limitations under the License. */ -// #define PRINT_DEBUG - -// TODO: remove -#include - -#include -// -// -// #include "io/comp/gpuinflate.hpp" #include "io/comp/nvcomp_adapter.hpp" #include "io/orc/reader_impl.hpp" @@ -36,7 +27,6 @@ #include #include #include -#include #include #include @@ -45,7 +35,6 @@ #include #include -#include #include #include #include @@ -55,7 +44,7 @@ #include #include -#include +#include namespace cudf::io::orc::detail { @@ -65,7 +54,7 @@ namespace { * @brief Decompresses the stripe data, at stream granularity. * * Only the streams in the provided `stream_range` are decoded. That range is determined in - * the previous steps, after splitting stripes into subsets to maintain memory usage to be + * the previous steps, after splitting stripes into ranges to maintain memory usage to be * under data read limit. * * @param loaded_stripe_range Range of stripes that are already loaded in memory @@ -107,27 +96,14 @@ rmm::device_buffer decompress_stripe_data( for (auto stream_idx = stream_range.begin; stream_idx < stream_range.end; ++stream_idx) { auto const& info = stream_info[stream_idx]; -#ifdef LOCAL_TEST -// printf("collec stream again [%d, %d, %d, %d]: dst = %lu, length = %lu\n", -// (int)info.source.stripe_idx, -// (int)info.source.level, -// (int)info.source.orc_col_idx, -// (int)info.source.kind, -// info.dst_pos, -// info.length); -// fflush(stdout); -#endif - compinfo.push_back(gpu::CompressedStreamInfo( static_cast( stripe_data[info.source.stripe_idx - loaded_stripe_range.begin].data()) + info.dst_pos, info.length)); - auto const& cached_comp_info = compinfo_map.at(stream_source_info{ - info.source.stripe_idx, info.source.level, info.source.orc_col_idx, info.source.kind}); - auto& stream_comp_info = compinfo.back(); - + auto const& cached_comp_info = compinfo_map.at(info.source); + auto& stream_comp_info = compinfo.back(); stream_comp_info.num_compressed_blocks = cached_comp_info.num_compressed_blocks; stream_comp_info.num_uncompressed_blocks = cached_comp_info.num_uncompressed_blocks; stream_comp_info.max_uncompressed_size = cached_comp_info.total_decomp_size; @@ -141,52 +117,12 @@ rmm::device_buffer decompress_stripe_data( not((num_uncompressed_blocks + num_compressed_blocks > 0) and (total_decomp_size == 0)), "Inconsistent info on compression blocks"); -#ifdef XXX - std::size_t old_num_compressed_blocks = num_compressed_blocks; - std::size_t old_num_uncompressed_blocks = num_uncompressed_blocks; - std::size_t old_total_decomp_size = total_decomp_size; - - num_compressed_blocks = 0; - num_uncompressed_blocks = 0; - total_decomp_size = 0; - for (std::size_t i = 0; i < compinfo.size(); ++i) { - num_compressed_blocks += compinfo[i].num_compressed_blocks; - num_uncompressed_blocks += compinfo[i].num_uncompressed_blocks; - total_decomp_size += compinfo[i].max_uncompressed_size; - - auto const& info = stream_info[i]; - printf("compute info [%d, %d, %d, %d]: %lu | %lu | %lu\n", - (int)info.source.stripe_idx, - (int)info.source.level, - (int)info.source.orc_cold_idx, - (int)info.source.kind, - (size_t)compinfo[i].num_compressed_blocks, - (size_t)compinfo[i].num_uncompressed_blocks, - compinfo[i].max_uncompressed_size); - fflush(stdout); - } - - if (old_num_compressed_blocks != num_compressed_blocks || - old_num_uncompressed_blocks != num_uncompressed_blocks || - old_total_decomp_size != total_decomp_size) { - printf("invalid: %d - %d, %d - %d, %d - %d\n", - (int)old_num_compressed_blocks, - (int)num_compressed_blocks, - (int)old_num_uncompressed_blocks, - (int)num_uncompressed_blocks, - (int)old_total_decomp_size, - (int)total_decomp_size - - ); - } -#endif - - // Buffer needs to be padded. - // Required by `gpuDecodeOrcColumnData`. + // Buffer needs to be padded.This is required by `gpuDecodeOrcColumnData`. rmm::device_buffer decomp_data( cudf::util::round_up_safe(total_decomp_size, BUFFER_PADDING_MULTIPLE), stream); // If total_decomp_size is zero, the input data may be just empty. + // This is still a valid input, thus do not be panick. if (decomp_data.is_empty()) { return decomp_data; } rmm::device_uvector> inflate_in( @@ -471,10 +407,6 @@ void decode_stream_data(int64_t num_dicts, auto const num_stripes = chunks.size().first; auto const num_columns = chunks.size().second; -#ifdef LOCAL_TEST - printf("decode %d stripess \n", (int)num_stripes); -#endif - thrust::counting_iterator col_idx_it(0); thrust::counting_iterator stripe_idx_it(0); @@ -495,29 +427,11 @@ void decode_stream_data(int64_t num_dicts, chunks.base_device_ptr(), global_dict.data(), num_columns, num_stripes, skip_rows, stream); if (level > 0) { -#ifdef LOCAL_TEST - printf("update_null_mask\n"); -#endif - // Update nullmasks for children if parent was a struct and had null mask update_null_mask(chunks, out_buffers, stream, mr); } rmm::device_scalar error_count(0, stream); - // Update the null map for child columns - - // printf( - // "num col: %d, num stripe: %d, skip row: %d, row_groups size: %d, row index stride: %d, " - // "level: " - // "%d\n", - // (int)num_columns, - // (int)num_stripes, - // (int)skip_rows, - // (int)row_groups.size().first, - // (int)row_index_stride, - // (int)level - // ); - gpu::DecodeOrcColumnData(chunks.base_device_ptr(), global_dict.data(), row_groups, @@ -541,12 +455,6 @@ void decode_stream_data(int64_t num_dicts, stripe_idx_it + num_stripes, 0, [&](auto null_count, auto const stripe_idx) { - // printf( - // "null count: %d => %d\n", (int)stripe_idx, - // (int)chunks[stripe_idx][col_idx].null_count); - // printf("num child rows: %d \n", - // (int)chunks[stripe_idx][col_idx].num_child_rows); - return null_count + chunks[stripe_idx][col_idx].null_count; }); }); @@ -721,17 +629,18 @@ void generate_offsets_for_list(host_span buff_data, rmm::cuda_ } /** - * @brief Find the splits of the input table such that each split range has cumulative size less + * @brief Find the splits of the input table such that each split range of rows has data size less * than a given `size_limit`. * * The parameter `segment_length` is to control the granularity of splits. The output ranges will * always have numbers of rows that are multiple of this value, except the last range that contains * the remaining rows. * - * Similar to `find_splits`, the given limit is just a soft limit. The function will never output + * Similar to `find_splits`, the given limit is just a soft limit. This function will never output * empty ranges, even they have sizes exceed the value of `size_limit`. * * @param input The input table to find splits + * @param segment_length Value to control granularity of the output ranges * @param size_limit A limit on the output size of each split range * @param stream CUDA stream used for device memory operations and kernel launches * @return A vector of ranges as splits of the input @@ -741,13 +650,8 @@ std::vector find_table_splits(table_view const& input, std::size_t size_limit, rmm::cuda_stream_view stream) { -#ifdef LOCAL_TEST - printf("find table split, seg length = %d, limit = %d \n", segment_length, (int)size_limit); -#endif - - // If segment_length is zero: we don't have any limit on granularity. - // As such, set segment length equal to the number of rows. - if (segment_length == 0) { segment_length = input.num_rows(); } + CUDF_EXPECTS(size_limit > 0, "Invalid size limit"); + CUDF_EXPECTS(segment_length > 0, "Invalid segment_length"); // `segmented_row_bit_count` requires that `segment_length` is not larger than number of rows. segment_length = std::min(segment_length, input.num_rows()); @@ -776,20 +680,6 @@ std::vector find_table_splits(table_view const& input, static_cast(size)}; }); -#ifdef LOCAL_TEST - { - int count{0}; - // TODO: remove: - segmented_sizes.device_to_host_sync(stream); - printf("total row sizes by segment = %d:\n", (int)segment_length); - for (auto& size : segmented_sizes) { - printf("size: %ld, %zu\n", size.count, size.size_bytes / CHAR_BIT); - if (count > 5) break; - ++count; - } - } -#endif - // TODO: exec_policy_nosync thrust::inclusive_scan(rmm::exec_policy(stream), segmented_sizes.d_begin(), @@ -808,6 +698,8 @@ void reader::impl::decompress_and_decode() { if (_file_itm_data.has_no_data()) { return; } + CUDF_EXPECTS(_chunk_read_data.curr_load_stripe_range > 0, "There is not any stripe loaded."); + auto const stripe_range = _chunk_read_data.decode_stripe_ranges[_chunk_read_data.curr_decode_stripe_range++]; auto const stripe_start = stripe_range.begin; @@ -815,16 +707,10 @@ void reader::impl::decompress_and_decode() auto const stripe_count = stripe_range.end - stripe_range.begin; // The start index of loaded stripes. They are different from decoding stripes. - CUDF_EXPECTS(_chunk_read_data.curr_load_stripe_range > 0, "There is not any stripe loaded."); auto const load_stripe_range = _chunk_read_data.load_stripe_ranges[_chunk_read_data.curr_load_stripe_range - 1]; auto const load_stripe_start = load_stripe_range.begin; -#ifdef LOCAL_TEST - printf("\ndecoding data from stripe %d -> %d\n", (int)stripe_start, (int)stripe_end); - printf("\n loaded stripe start %d \n", (int)load_stripe_start); -#endif - auto const rows_to_skip = _file_itm_data.rows_to_skip; auto const& selected_stripes = _file_itm_data.selected_stripes; @@ -834,13 +720,6 @@ void reader::impl::decompress_and_decode() auto const& stripe = selected_stripes[stripe_idx]; auto const stripe_rows = static_cast(stripe.stripe_info->numberOfRows); rows_to_decode += stripe_rows; - - // The rows to skip should never be larger than number of rows in the first loaded stripes. - // Technically, overflow here should never happen since `select_stripes` already checked it. - // This is just to make sure there was not any bug there. - if (rows_to_skip > 0) { - CUDF_EXPECTS(rows_to_skip < stripe_rows, "Invalid rows_to_skip computation."); - } } CUDF_EXPECTS(rows_to_decode > rows_to_skip, "Invalid rows_to_decode computation."); @@ -851,18 +730,15 @@ void reader::impl::decompress_and_decode() _file_itm_data.rows_to_skip = 0; _file_itm_data.rows_to_read -= rows_to_decode; -#ifdef LOCAL_TEST - printf("decode, skip = %ld, decode = %ld\n", rows_to_skip, rows_to_decode); -#endif - // Technically, overflow here should never happen because the `load_data()` step // already handled it by splitting the loaded stripe range into multiple decode ranges. CUDF_EXPECTS(rows_to_decode <= static_cast(std::numeric_limits::max()), "Number or rows to decode exceeds the column size limit.", std::overflow_error); + // TODO: move this to global process // Set up table for converting timestamp columns from local to UTC time - auto const tz_table = [&, &selected_stripes = selected_stripes] { + auto const tz_table = [&, &writerTimezone = selected_stripes[0].stripe_footer->writerTimezone] { auto const has_timestamp_column = std::any_of( _selected_columns.levels.cbegin(), _selected_columns.levels.cend(), [&](auto const& col_lvl) { return std::any_of(col_lvl.cbegin(), col_lvl.cend(), [&](auto const& col_meta) { @@ -870,9 +746,9 @@ void reader::impl::decompress_and_decode() }); }); - return has_timestamp_column ? cudf::detail::make_timezone_transition_table( - {}, selected_stripes[0].stripe_footer->writerTimezone, _stream) - : std::make_unique(); + return has_timestamp_column + ? cudf::detail::make_timezone_transition_table({}, writerTimezone, _stream) + : std::make_unique(); }(); auto const tz_table_dptr = table_device_view::create(tz_table->view(), _stream); @@ -888,17 +764,6 @@ void reader::impl::decompress_and_decode() auto& col_meta = *_col_meta; for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { -#ifdef LOCAL_TEST - printf("processing level = %d\n", (int)level); - - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } -#endif - auto const& stripe_stream_ranges = _file_itm_data.lvl_stripe_stream_ranges[level]; auto const stream_range = get_range(stripe_stream_ranges, stripe_range); @@ -915,15 +780,6 @@ void reader::impl::decompress_and_decode() cudf::detail::hostdevice_2dvector(stripe_count, num_level_columns, _stream); memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); -#ifdef LOCAL_TEST - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } -#endif - const bool use_index = _config.use_index && // Do stripes have row group index @@ -937,10 +793,6 @@ void reader::impl::decompress_and_decode() // TODO: Fix logic to handle unaligned rows (rows_to_skip == 0); -#ifdef LOCAL_TEST - printf(" use_index: %d\n", (int)use_index); -#endif - null_count_prefix_sums[level].reserve(num_level_columns); std::generate_n(std::back_inserter(null_count_prefix_sums[level]), num_level_columns, [&]() { return cudf::detail::make_zeroed_device_uvector_async( @@ -954,16 +806,11 @@ void reader::impl::decompress_and_decode() std::size_t local_stream_order{0}; for (auto stripe_idx = stripe_start; stripe_idx < stripe_end; ++stripe_idx) { -#ifdef LOCAL_TEST - printf("processing stripe_idx = %d\n", (int)stripe_idx); -#endif - auto const& stripe = selected_stripes[stripe_idx]; auto const stripe_info = stripe.stripe_info; auto const stripe_footer = stripe.stripe_footer; - // Gather only for the decoding stripes, thus the first parameter (`global_stripe_order`) - // needs to be normalized to 0-based. + // The first parameter (`stripe_order`) must be normalized to 0-based. auto const total_data_size = gather_stream_info_and_column_desc(stripe_idx - stripe_start, level, stripe_info, @@ -978,27 +825,19 @@ void reader::impl::decompress_and_decode() &chunks); auto const is_stripe_data_empty = total_data_size == 0; -#ifdef LOCAL_TEST - printf("is_stripe_data_empty: %d\n", (int)is_stripe_data_empty); -#endif - CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, "Invalid index rowgroup stream data"); auto const dst_base = static_cast(stripe_data[stripe_idx - load_stripe_start].data()); - auto const num_rows_per_stripe = static_cast(stripe_info->numberOfRows); + auto const num_rows_in_stripe = static_cast(stripe_info->numberOfRows); uint32_t const rowgroup_id = num_rowgroups; uint32_t const stripe_num_rowgroups = - use_index ? (num_rows_per_stripe + _metadata.get_row_index_stride() - 1) / + use_index ? (num_rows_in_stripe + _metadata.get_row_index_stride() - 1) / _metadata.get_row_index_stride() : 0; -#ifdef LOCAL_TEST - printf(" num_rows_per_stripe : %d\n", (int)num_rows_per_stripe); -#endif - // Update chunks to reference streams pointers. for (std::size_t col_idx = 0; col_idx < num_level_columns; col_idx++) { auto& chunk = chunks[stripe_idx - stripe_start][col_idx]; @@ -1010,7 +849,7 @@ void reader::impl::decompress_and_decode() : col_meta.child_start_row[(stripe_idx - stripe_start) * num_level_columns + col_idx]; chunk.num_rows = (level == 0) - ? num_rows_per_stripe + ? num_rows_in_stripe : col_meta.num_child_rows_per_stripe[(stripe_idx - stripe_start) * num_level_columns + col_idx]; chunk.column_num_rows = (level == 0) ? rows_to_decode : col_meta.num_child_rows[col_idx]; @@ -1052,7 +891,7 @@ void reader::impl::decompress_and_decode() } } - stripe_start_row += num_rows_per_stripe; + stripe_start_row += num_rows_in_stripe; num_rowgroups += stripe_num_rowgroups; } @@ -1080,15 +919,6 @@ void reader::impl::decompress_and_decode() // Setup row group descriptors if using indexes. if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { -#ifdef LOCAL_TEST - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } -#endif - auto decomp_data = decompress_stripe_data(load_stripe_range, stream_range, stripe_count, @@ -1108,15 +938,6 @@ void reader::impl::decompress_and_decode() stripe_data[i + stripe_start - load_stripe_start] = {}; } -#ifdef LOCAL_TEST - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } -#endif - } else { if (row_groups.size().first) { chunks.host_to_device_async(_stream); @@ -1133,33 +954,12 @@ void reader::impl::decompress_and_decode() } } -#ifdef LOCAL_TEST - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } -#endif - - _out_buffers[level].clear(); - -#ifdef LOCAL_TEST - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } -#endif + _out_buffers[level].resize(0); for (std::size_t i = 0; i < column_types.size(); ++i) { bool is_nullable = false; for (std::size_t j = 0; j < stripe_count; ++j) { if (chunks[j][i].strm_len[gpu::CI_PRESENT] != 0) { -#ifdef LOCAL_TEST - printf(" is nullable\n"); -#endif is_nullable = true; break; } @@ -1168,39 +968,11 @@ void reader::impl::decompress_and_decode() auto const is_list_type = (column_types[i].id() == type_id::LIST); auto const n_rows = (level == 0) ? rows_to_decode : col_meta.num_child_rows[i]; -#ifdef LOCAL_TEST - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } -#endif - // For list column, offset column will be always size + 1. _out_buffers[level].emplace_back( column_types[i], is_list_type ? n_rows + 1 : n_rows, is_nullable, _stream, _mr); - -#ifdef LOCAL_TEST - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", buffer size: " << n_rows - << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } -#endif } -#ifdef LOCAL_TEST - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } -#endif - decode_stream_data(num_dict_entries, rows_to_skip, _metadata.get_row_index_stride(), @@ -1212,20 +984,7 @@ void reader::impl::decompress_and_decode() _stream, _mr); -#ifdef LOCAL_TEST - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } -#endif - if (nested_cols.size()) { -#ifdef LOCAL_TEST - printf("have nested col\n"); -#endif - // Extract information to process nested child columns. scan_null_counts(chunks, null_count_prefix_sums[level], _stream); @@ -1247,15 +1006,6 @@ void reader::impl::decompress_and_decode() } } // end loop level -#ifdef LOCAL_TEST - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } -#endif - // Now generate a table from the decoded result. std::vector> out_columns; _out_metadata = get_meta_with_user_data(); @@ -1271,9 +1021,9 @@ void reader::impl::decompress_and_decode() }); _chunk_read_data.decoded_table = std::make_unique
(std::move(out_columns)); - // Free up memory. + // Free up temp memory used for decoding. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { - _out_buffers[level].clear(); + _out_buffers[level].resize(0); auto& stripe_data = _file_itm_data.lvl_stripe_data[level]; if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { @@ -1285,16 +1035,11 @@ void reader::impl::decompress_and_decode() } } -#ifdef LOCAL_TEST - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << __LINE__ << ", decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } -#endif - + // Output table range is reset to start from the first position. _chunk_read_data.curr_output_table_range = 0; + + // Split the decoded table into ranges that be output into chunks having size within the given + // output size limit. _chunk_read_data.output_table_ranges = _chunk_read_data.output_size_limit == 0 ? std::vector{range{ @@ -1303,23 +1048,6 @@ void reader::impl::decompress_and_decode() _chunk_read_data.output_row_granularity, _chunk_read_data.output_size_limit, _stream); - -#ifdef LOCAL_TEST - auto& splits = _chunk_read_data.output_table_ranges; - printf("------------\nSplits decoded table (/total num rows = %d): \n", - (int)_chunk_read_data.decoded_table->num_rows()); - for (size_t idx = 0; idx < splits.size(); idx++) { - printf("{%ld, %ld}\n", splits[idx].begin, splits[idx].end); - } - fflush(stdout); - - { - _stream.synchronize(); - auto peak_mem = mem_stats_logger.peak_memory_usage(); - std::cout << "decomp and decode, peak_memory_usage: " << peak_mem << "(" - << (peak_mem * 1.0) / (1024.0 * 1024.0) << " MB)" << std::endl; - } -#endif } } // namespace cudf::io::orc::detail From f19794695aa2dbbcf4e571adf2b9ee9bd8f290d8 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 15 Mar 2024 13:24:37 -0700 Subject: [PATCH 250/321] Cleanup `reader_impl_chunking.hpp` Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.hpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index f77ca173d35..15cacea2e1d 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -220,16 +220,9 @@ struct chunk_read_data { return curr_output_table_range < output_table_ranges.size(); } - // Only has more chunk to output if: bool has_next() const { -#ifdef LOCAL_TEST - printf("compute has_next: %d, %d, %d\n", - (int)more_stripe_to_load(), - (int)more_stripe_to_decode(), - (int)more_table_chunk_to_output()); -#endif - + // Only has more chunk to output if: return more_stripe_to_load() || more_stripe_to_decode() || more_table_chunk_to_output(); } }; From 74d806bf8dfc6d541bfc26ece00716bff7a5db3c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 15 Mar 2024 13:28:25 -0700 Subject: [PATCH 251/321] Change row selection test Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cu | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 78f0894134c..2be1513e5f2 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1277,8 +1277,12 @@ TEST_F(OrcChunkedReaderInputLimitTest, MixedColumnsHavingList) TEST_F(OrcChunkedReaderInputLimitTest, ReadWithRowSelection) { - int64_t constexpr num_rows = 100'000'000l; + // `num_rows` should not be divisible by `stripe_size_rows`, to test the correctness of row + // selections. + int64_t constexpr num_rows = 100'517'687l; int constexpr rows_per_stripe = 100'000; + static_assert(num_rows % rows_per_stripe != 0, + "`num_rows` should not be divisible by `stripe_size_rows`."); auto const it = thrust::make_counting_iterator(0); auto const col = int32s_col(it, it + num_rows); @@ -1294,7 +1298,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, ReadWithRowSelection) // Verify metadata. auto const metadata = cudf::io::read_orc_metadata(cudf::io::source_info{filepath}); EXPECT_EQ(metadata.num_rows(), num_rows); - EXPECT_EQ(metadata.num_stripes(), num_rows / rows_per_stripe); + EXPECT_EQ(metadata.num_stripes(), num_rows / rows_per_stripe + 1); int constexpr random_val = 123456; From 2a67770bf6828e55e2c38b8fa1e2a44f87ea70d7 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 15 Mar 2024 13:31:19 -0700 Subject: [PATCH 252/321] Cleanup test Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cu | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 2be1513e5f2..173e9ce00d4 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1470,7 +1470,5 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) CUDF_TEST_EXPECT_TABLES_EQUAL(expected, test_chunk->view()); } - printf("done local test\n"); - fflush(stdout); #endif // LOCAL_TEST } From f76f61e21280c4251c30b6f8c9fe159a145f0f9e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 15 Mar 2024 15:06:33 -0700 Subject: [PATCH 253/321] Construct timezone table in global step Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 16 ++++++++++++++++ cpp/src/io/orc/reader_impl_chunking.hpp | 3 +++ cpp/src/io/orc/reader_impl_decode.cu | 20 ++------------------ 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index e5d17204fb9..06800224865 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -19,6 +19,7 @@ #include "io/orc/reader_impl_chunking.hpp" #include "io/orc/reader_impl_helpers.hpp" +#include #include #include @@ -29,6 +30,7 @@ #include #include +#include #include namespace cudf::io::orc::detail { @@ -251,6 +253,20 @@ void reader::impl::global_preprocess(read_mode mode) auto const num_total_stripes = selected_stripes.size(); auto const num_levels = _selected_columns.num_levels(); + // Set up table for converting timestamp columns from local to UTC time + _file_itm_data.tz_table = [&] { + auto const has_timestamp_column = std::any_of( + _selected_columns.levels.cbegin(), _selected_columns.levels.cend(), [&](auto const& col_lvl) { + return std::any_of(col_lvl.cbegin(), col_lvl.cend(), [&](auto const& col_meta) { + return _metadata.get_col_type(col_meta.id).kind == TypeKind::TIMESTAMP; + }); + }); + + return has_timestamp_column ? cudf::detail::make_timezone_transition_table( + {}, selected_stripes[0].stripe_footer->writerTimezone, _stream) + : std::make_unique(); + }(); + // // Pre allocate necessary memory for data processed in the other reading steps: // diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 15cacea2e1d..5f958d6d73f 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -168,6 +168,9 @@ struct file_intermediate_data { // List of nested type columns at each nested level. std::vector> lvl_nested_cols; + // Table for converting timestamp columns from local to UTC time. + std::unique_ptr tz_table; + bool global_preprocessed{false}; }; diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 2aab392cd6b..d3fc94ed760 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -22,7 +22,6 @@ #include "io/utilities/config_utils.hpp" #include -#include #include #include #include @@ -736,23 +735,8 @@ void reader::impl::decompress_and_decode() "Number or rows to decode exceeds the column size limit.", std::overflow_error); - // TODO: move this to global process - // Set up table for converting timestamp columns from local to UTC time - auto const tz_table = [&, &writerTimezone = selected_stripes[0].stripe_footer->writerTimezone] { - auto const has_timestamp_column = std::any_of( - _selected_columns.levels.cbegin(), _selected_columns.levels.cend(), [&](auto const& col_lvl) { - return std::any_of(col_lvl.cbegin(), col_lvl.cend(), [&](auto const& col_meta) { - return _metadata.get_col_type(col_meta.id).kind == TypeKind::TIMESTAMP; - }); - }); - - return has_timestamp_column - ? cudf::detail::make_timezone_transition_table({}, writerTimezone, _stream) - : std::make_unique(); - }(); - auto const tz_table_dptr = table_device_view::create(tz_table->view(), _stream); - - auto const num_levels = _selected_columns.num_levels(); + auto const tz_table_dptr = table_device_view::create(_file_itm_data.tz_table->view(), _stream); + auto const num_levels = _selected_columns.num_levels(); _out_buffers.resize(num_levels); // Column descriptors ('chunks'). From de72389cd222dcf7b1821630720f9dc8345d6f02 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 15 Mar 2024 15:21:05 -0700 Subject: [PATCH 254/321] Use `rmm::exec_policy_nosync` Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 6 ++---- cpp/src/io/orc/reader_impl_decode.cu | 17 +++++++---------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 06800224865..7b89d63ec11 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -427,10 +427,9 @@ void reader::impl::global_preprocess(read_mode mode) return; } - // TODO: exec_policy_nosync // Compute the prefix sum of stripes' data sizes. total_stripe_sizes.host_to_device_async(_stream); - thrust::inclusive_scan(rmm::exec_policy(_stream), // todo no sync + thrust::inclusive_scan(rmm::exec_policy_nosync(_stream), total_stripe_sizes.d_begin(), total_stripe_sizes.d_end(), total_stripe_sizes.d_begin(), @@ -622,10 +621,9 @@ void reader::impl::load_data() return; } - // TODO: exec_policy_nosync // Compute the prefix sum of stripe data sizes and rows. stripe_decomp_sizes.host_to_device_async(_stream); - thrust::inclusive_scan(rmm::exec_policy(_stream), + thrust::inclusive_scan(rmm::exec_policy_nosync(_stream), stripe_decomp_sizes.d_begin(), stripe_decomp_sizes.d_end(), stripe_decomp_sizes.d_begin(), diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index d3fc94ed760..ff4528aea5f 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -129,7 +129,7 @@ rmm::device_buffer decompress_stripe_data( rmm::device_uvector> inflate_out( num_compressed_blocks + num_uncompressed_blocks, stream); rmm::device_uvector inflate_res(num_compressed_blocks, stream); - thrust::fill(rmm::exec_policy(stream), + thrust::fill(rmm::exec_policy_nosync(stream), inflate_res.begin(), inflate_res.end(), compression_result{0, compression_status::FAILURE}); @@ -233,7 +233,7 @@ rmm::device_buffer decompress_stripe_data( // Check if any block has been failed to decompress. // Not using `thrust::any` or `thrust::count_if` to defer stream sync. thrust::for_each( - rmm::exec_policy(stream), + rmm::exec_policy_nosync(stream), thrust::make_counting_iterator(std::size_t{0}), thrust::make_counting_iterator(inflate_res.size()), [results = inflate_res.begin(), @@ -332,7 +332,7 @@ void update_null_mask(cudf::detail::hostdevice_2dvector& chunks if (child_valid_map_base != nullptr) { rmm::device_uvector dst_idx(child_mask_len, stream); // Copy indexes at which the parent has valid value. - thrust::copy_if(rmm::exec_policy(stream), + thrust::copy_if(rmm::exec_policy_nosync(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + parent_mask_len, dst_idx.begin(), @@ -346,7 +346,7 @@ void update_null_mask(cudf::detail::hostdevice_2dvector& chunks uint32_t* dst_idx_ptr = dst_idx.data(); // Copy child valid bits from child column to valid indexes, this will merge both child // and parent null masks - thrust::for_each(rmm::exec_policy(stream), + thrust::for_each(rmm::exec_policy_nosync(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + dst_idx.size(), [child_valid_map_base, dst_idx_ptr, merged_mask] __device__(auto idx) { @@ -481,8 +481,7 @@ void scan_null_counts(cudf::detail::hostdevice_2dvector const& auto const d_prefix_sums_to_update = cudf::detail::make_device_uvector_async( prefix_sums_to_update, stream, rmm::mr::get_current_device_resource()); - // TODO: exec_policy_nosync - thrust::for_each(rmm::exec_policy(stream), + thrust::for_each(rmm::exec_policy_nosync(stream), d_prefix_sums_to_update.begin(), d_prefix_sums_to_update.end(), [chunks = cudf::detail::device_2dspan{chunks}] __device__( @@ -661,9 +660,8 @@ std::vector find_table_splits(table_view const& input, auto segmented_sizes = cudf::detail::hostdevice_vector(d_segmented_sizes->size(), stream); - // TODO: exec_policy_nosync thrust::transform( - rmm::exec_policy(stream), + rmm::exec_policy_nosync(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(d_segmented_sizes->size()), segmented_sizes.d_begin(), @@ -679,8 +677,7 @@ std::vector find_table_splits(table_view const& input, static_cast(size)}; }); - // TODO: exec_policy_nosync - thrust::inclusive_scan(rmm::exec_policy(stream), + thrust::inclusive_scan(rmm::exec_policy_nosync(stream), segmented_sizes.d_begin(), segmented_sizes.d_end(), segmented_sizes.d_begin(), From 28f7cfc56a8068385259ad1084adf0a32f96f000 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 20 Mar 2024 11:14:19 -0700 Subject: [PATCH 255/321] Optimize benchmark code Signed-off-by: Nghia Truong --- cpp/benchmarks/io/orc/orc_reader_input.cpp | 78 +++++++++------------- 1 file changed, 31 insertions(+), 47 deletions(-) diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index 8514af28c63..e710219852e 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -31,39 +31,6 @@ namespace { constexpr int64_t data_size = 512 << 20; constexpr cudf::size_type num_cols = 64; -template -void read_once(cudf::io::orc_reader_options const& options, - cudf::size_type num_rows_to_read, - Timer& timer) -{ - timer.start(); - auto const result = cudf::io::read_orc(options); - timer.stop(); - - CUDF_EXPECTS(result.tbl->num_columns() == num_cols, "Unexpected number of columns"); - CUDF_EXPECTS(result.tbl->num_rows() == num_rows_to_read, "Unexpected number of rows"); -} - -template -void chunked_read(cudf::io::orc_reader_options const& options, - cudf::size_type num_rows_to_read, - std::size_t output_limit, - std::size_t read_limit, - Timer& timer) -{ - auto reader = cudf::io::chunked_orc_reader(output_limit, read_limit, options); - cudf::size_type num_rows{0}; - - timer.start(); - do { - auto chunk = reader.read_chunk(); - num_rows += chunk.tbl->num_rows(); - } while (reader.has_next()); - timer.stop(); - - CUDF_EXPECTS(num_rows == num_rows_to_read, "Unexpected number of rows"); -} - template void orc_read_common(cudf::size_type num_rows_to_read, cuio_source_sink_pair& source_sink, @@ -74,18 +41,39 @@ void orc_read_common(cudf::size_type num_rows_to_read, auto mem_stats_logger = cudf::memory_stats_logger(); // init stats logger state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); - state.exec( - nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch&, auto& timer) { - try_drop_l3_cache(); - if constexpr (!is_chunked_read) { - read_once(read_opts, num_rows_to_read, timer); - } else { + if constexpr (is_chunked_read) { + state.exec( + nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch&, auto& timer) { + try_drop_l3_cache(); auto const output_limit = static_cast(state.get_int64("output_limit")); auto const read_limit = static_cast(state.get_int64("read_limit")); - chunked_read(read_opts, num_rows_to_read, output_limit, read_limit, timer); - } - }); + + auto reader = cudf::io::chunked_orc_reader(output_limit, read_limit, read_opts); + cudf::size_type num_rows{0}; + + timer.start(); + do { + auto chunk = reader.read_chunk(); + num_rows += chunk.tbl->num_rows(); + } while (reader.has_next()); + timer.stop(); + + CUDF_EXPECTS(num_rows == num_rows_to_read, "Unexpected number of rows"); + }); + } else { // not is_chunked_read + state.exec( + nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch&, auto& timer) { + try_drop_l3_cache(); + + timer.start(); + auto const result = cudf::io::read_orc(read_opts); + timer.stop(); + + CUDF_EXPECTS(result.tbl->num_columns() == num_cols, "Unexpected number of columns"); + CUDF_EXPECTS(result.tbl->num_rows() == num_rows_to_read, "Unexpected number of rows"); + }); + } auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); @@ -150,11 +138,7 @@ void orc_read_io_compression(nvbench::state& state) return view.num_rows(); }(); - if constexpr (chunked_read) { - orc_read_common(num_rows_written, source_sink, state); - } else { - orc_read_common(num_rows_written, source_sink, state); - } + orc_read_common(num_rows_written, source_sink, state); } template From f527c994eec04fdc537a5d115e70966ea8e68a5e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 20 Mar 2024 15:34:36 -0700 Subject: [PATCH 256/321] Do not sync Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_decode.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index ff4528aea5f..4b9eecd884e 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -421,7 +421,7 @@ void decode_stream_data(int64_t num_dicts, // Allocate global dictionary for deserializing rmm::device_uvector global_dict(num_dicts, stream); - chunks.host_to_device_sync(stream); + chunks.host_to_device_async(stream); gpu::DecodeNullsAndStringDictionaries( chunks.base_device_ptr(), global_dict.data(), num_columns, num_stripes, skip_rows, stream); From 96f89a1589c6444e3f7dd8a2082f2240fa7e0a26 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 20 Mar 2024 16:33:35 -0700 Subject: [PATCH 257/321] Simplify code Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_decode.cu | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 4b9eecd884e..2a171a27852 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -791,8 +791,11 @@ void reader::impl::decompress_and_decode() auto const stripe_info = stripe.stripe_info; auto const stripe_footer = stripe.stripe_footer; + // Normalize stripe_idx to 0-based. + auto const stripe_local_idx = stripe_idx - stripe_start; + // The first parameter (`stripe_order`) must be normalized to 0-based. - auto const total_data_size = gather_stream_info_and_column_desc(stripe_idx - stripe_start, + auto const total_data_size = gather_stream_info_and_column_desc(stripe_local_idx, level, stripe_info, stripe_footer, @@ -821,18 +824,16 @@ void reader::impl::decompress_and_decode() // Update chunks to reference streams pointers. for (std::size_t col_idx = 0; col_idx < num_level_columns; col_idx++) { - auto& chunk = chunks[stripe_idx - stripe_start][col_idx]; + auto& chunk = chunks[stripe_local_idx][col_idx]; // start row, number of rows in a each stripe and total number of rows // may change in lower levels of nesting chunk.start_row = - (level == 0) - ? stripe_start_row - : col_meta.child_start_row[(stripe_idx - stripe_start) * num_level_columns + col_idx]; + (level == 0) ? stripe_start_row + : col_meta.child_start_row[stripe_local_idx * num_level_columns + col_idx]; chunk.num_rows = (level == 0) ? num_rows_in_stripe - : col_meta.num_child_rows_per_stripe[(stripe_idx - stripe_start) * num_level_columns + - col_idx]; + : col_meta.num_child_rows_per_stripe[stripe_local_idx * num_level_columns + col_idx]; chunk.column_num_rows = (level == 0) ? rows_to_decode : col_meta.num_child_rows[col_idx]; chunk.parent_validity_info = (level == 0) ? column_validity_info{} : col_meta.parent_column_data[col_idx]; From 20d8e81eb2c2a0b3a5c4d7bad745ed92a86d1797 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 28 Mar 2024 10:49:54 -0700 Subject: [PATCH 258/321] Add assertion to `num_rows` in parquet reader Signed-off-by: Nghia Truong --- cpp/src/io/parquet/reader_impl_helpers.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/io/parquet/reader_impl_helpers.cpp b/cpp/src/io/parquet/reader_impl_helpers.cpp index 7b890111dab..604b9b77a8a 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.cpp +++ b/cpp/src/io/parquet/reader_impl_helpers.cpp @@ -631,6 +631,8 @@ aggregate_reader_metadata::select_row_groups( if (not row_group_indices.empty()) { return std::pair{}; } auto const from_opts = cudf::io::detail::skip_rows_num_rows_from_options( skip_rows_opt, num_rows_opt, get_num_rows()); + CUDF_EXPECTS(from_opts.second <= static_cast(std::numeric_limits::max()), + "Number of reading rows exceeds cudf's column size limit."); return std::pair{static_cast(from_opts.first), static_cast(from_opts.second)}; }(); From 99afb2edc59bd5cf99536ca3d7a3af658c4437df Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 28 Mar 2024 10:52:01 -0700 Subject: [PATCH 259/321] Fix comment Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 173e9ce00d4..ba57145465e 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1431,7 +1431,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) .build(); auto reader = cudf::io::chunked_orc_reader( static_cast(rows_per_stripe * 5.7) * - sizeof(data_type) /* output limit, equal to 5.2M rows */, + sizeof(data_type) /* output limit, equal to 5.7M rows */, 0UL /* no input limit */, rows_per_stripe / 2 /* output granularity, or minimum number of rows for the output chunk */, read_opts); From 38d87487bc44a646a42abd63bbc23102b92c61b7 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 28 Mar 2024 10:52:09 -0700 Subject: [PATCH 260/321] Add assertion to `skip_rows` Signed-off-by: Nghia Truong --- cpp/include/cudf/io/orc.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 259c5c1016a..a28011feb8f 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -199,6 +199,7 @@ class orc_reader_options { */ void set_skip_rows(int64_t rows) { + CUDF_EXPECTS(rows >= 0, "skip_rows cannot be negative"); CUDF_EXPECTS(rows == 0 or _stripes.empty(), "Can't set both skip_rows along with stripes"); _skip_rows = rows; } From 6e658dc0b37b8b38fd8520a1acd79da126310325 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 28 Mar 2024 11:02:05 -0700 Subject: [PATCH 261/321] Update docs Signed-off-by: Nghia Truong --- cpp/src/io/orc/aggregate_orc_metadata.hpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cpp/src/io/orc/aggregate_orc_metadata.hpp b/cpp/src/io/orc/aggregate_orc_metadata.hpp index 65d1f0a7ad4..94f681fff0c 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.hpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.hpp @@ -112,6 +112,16 @@ class aggregate_orc_metadata { * @brief Selects the stripes to read, based on the row/stripe selection parameters. * * Stripes are potentially selected from multiple files. + * + * Upon parsing stripes' information, the number of skip rows and reading rows are also updated + * to be matched with the actual numbers for reading stripes from data sources. + * + * @param user_specified_stripes The specified stripe indices to read + * @param skip_rows Number of rows to skip from reading + * @param num_rows Number of rows to read + * @param stream CUDA stream used for device memory operations and kernel launches + * @return A tuple of the corrected skip_rows and num_rows values along with a vector of + * stripes' metadata such as footer, data information, and source index */ [[nodiscard]] std::tuple> select_stripes( std::vector> const& user_specified_stripes, From 734dcf363ee2c7a5fcf91e311913f6b44933440d Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 28 Mar 2024 13:34:54 -0700 Subject: [PATCH 262/321] Separate `impl` class from `reader` resulting into `reader_impl`, and also make `chunked_reader` independent from `reader` Signed-off-by: Nghia Truong --- cpp/include/cudf/io/detail/orc.hpp | 18 +++----- cpp/src/io/orc/reader.cu | 28 ++++++------ cpp/src/io/orc/reader_impl.cu | 62 +++++++++++++------------- cpp/src/io/orc/reader_impl.hpp | 36 +++++++-------- cpp/src/io/orc/reader_impl_chunking.cu | 4 +- cpp/src/io/orc/reader_impl_decode.cu | 2 +- 6 files changed, 73 insertions(+), 77 deletions(-) diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index c07dbef11d7..32b28692140 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -37,18 +37,15 @@ class chunked_orc_writer_options; namespace orc::detail { +// Forward declaration of the internal reader class +class reader_impl; + /** * @brief Class to read ORC dataset data into columns. */ class reader { protected: - class impl; - std::unique_ptr _impl; - - /** - * @brief Default constructor, needed for subclassing. - */ - reader(); + std::unique_ptr _impl; public: /** @@ -79,11 +76,10 @@ class reader { /** * @brief The reader class that supports iterative reading from an array of data sources. - * - * This class intentionally subclasses the `reader` class with private inheritance to hide the - * base class `reader::read()` API. As such, only chunked reading APIs are supported through it. */ -class chunked_reader : private reader { +class chunked_reader { + std::unique_ptr _impl; + public: /** * @copydoc cudf::io::chunked_orc_reader::chunked_orc_reader(std::size_t, std::size_t, size_type, diff --git a/cpp/src/io/orc/reader.cu b/cpp/src/io/orc/reader.cu index ea0b43c0f93..bcc1c5b8649 100644 --- a/cpp/src/io/orc/reader.cu +++ b/cpp/src/io/orc/reader.cu @@ -19,15 +19,14 @@ namespace cudf::io::orc::detail { -// Constructor and destructor are defined within this translation unit. -reader::reader() = default; +// Destructor are defined within this translation unit. reader::~reader() = default; reader::reader(std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) - : _impl{std::make_unique(std::move(sources), options, stream, mr)} + : _impl{std::make_unique(std::move(sources), options, stream, mr)} { } @@ -39,10 +38,9 @@ chunked_reader::chunked_reader(std::size_t output_size_limit, orc_reader_options const& options, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) - : reader() + : _impl{std::make_unique( + output_size_limit, data_read_limit, std::move(sources), options, stream, mr)} { - _impl = std::make_unique( - output_size_limit, data_read_limit, std::move(sources), options, stream, mr); } chunked_reader::chunked_reader(std::size_t output_size_limit, @@ -52,17 +50,19 @@ chunked_reader::chunked_reader(std::size_t output_size_limit, orc_reader_options const& options, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) - : reader() + : _impl{std::make_unique(output_size_limit, + data_read_limit, + output_row_granularity, + std::move(sources), + options, + stream, + mr)} { + // Although we internally accept non-positve value for `output_row_granularity` because we + // implicitly change such value into `DEFAULT_OUTPUT_ROW_GRANULARITY`. + // The user are not allowed to do so but instead required to specify an explicit positive number. CUDF_EXPECTS(output_row_granularity > 0, "The value of `output_row_granularity` must be positive."); - _impl = std::make_unique(output_size_limit, - data_read_limit, - output_row_granularity, - std::move(sources), - options, - stream, - mr); } chunked_reader::~chunked_reader() = default; diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 140e4517862..d2c881218cc 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -24,7 +24,7 @@ namespace cudf::io::orc::detail { -void reader::impl::prepare_data(read_mode mode) +void reader_impl::prepare_data(read_mode mode) { // There are no columns in the table. if (_selected_columns.num_levels() == 0) { return; } @@ -49,7 +49,7 @@ void reader::impl::prepare_data(read_mode mode) } } -table_with_metadata reader::impl::make_output_chunk() +table_with_metadata reader_impl::make_output_chunk() { // There is no columns in the table. if (_selected_columns.num_levels() == 0) { return {std::make_unique
(), table_metadata{}}; } @@ -104,7 +104,7 @@ table_with_metadata reader::impl::make_output_chunk() return {make_output_table(), table_metadata{_out_metadata} /*copy cached metadata*/}; } -table_metadata reader::impl::get_meta_with_user_data() +table_metadata reader_impl::get_meta_with_user_data() { if (_meta_with_user_data) { return table_metadata{*_meta_with_user_data}; } @@ -133,37 +133,37 @@ table_metadata reader::impl::get_meta_with_user_data() return out_metadata; } -reader::impl::impl(std::vector>&& sources, - orc_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - : reader::impl::impl(0UL, 0UL, std::move(sources), options, stream, mr) +reader_impl::reader_impl(std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + : reader_impl::reader_impl(0UL, 0UL, std::move(sources), options, stream, mr) { } -reader::impl::impl(std::size_t output_size_limit, - std::size_t data_read_limit, - std::vector>&& sources, - orc_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - : reader::impl::impl(output_size_limit, - data_read_limit, - DEFAULT_OUTPUT_ROW_GRANULARITY, - std::move(sources), - options, - stream, - mr) +reader_impl::reader_impl(std::size_t output_size_limit, + std::size_t data_read_limit, + std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + : reader_impl::reader_impl(output_size_limit, + data_read_limit, + DEFAULT_OUTPUT_ROW_GRANULARITY, + std::move(sources), + options, + stream, + mr) { } -reader::impl::impl(std::size_t output_size_limit, - std::size_t data_read_limit, - size_type output_row_granularity, - std::vector>&& sources, - orc_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +reader_impl::reader_impl(std::size_t output_size_limit, + std::size_t data_read_limit, + size_type output_row_granularity, + std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) : _stream(stream), _mr(mr), _config{options.get_timestamp_type(), @@ -188,19 +188,19 @@ reader::impl::impl(std::size_t output_size_limit, "skip_rows is not supported by nested column"); } -table_with_metadata reader::impl::read() +table_with_metadata reader_impl::read() { prepare_data(read_mode::READ_ALL); return make_output_chunk(); } -bool reader::impl::has_next() +bool reader_impl::has_next() { prepare_data(read_mode::CHUNKED_READ); return _chunk_read_data.has_next(); } -table_with_metadata reader::impl::read_chunk() +table_with_metadata reader_impl::read_chunk() { prepare_data(read_mode::CHUNKED_READ); return make_output_chunk(); diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 45d60acb3db..07beecb70d0 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -38,7 +38,7 @@ struct reader_column_meta; /** * @brief Implementation for ORC reader. */ -class reader::impl { +class reader_impl { public: /** * @brief Constructor from a dataset source with reader options. @@ -51,33 +51,33 @@ class reader::impl { * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ - explicit impl(std::vector>&& sources, - orc_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + explicit reader_impl(std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::io::orc::detail::chunked_reader::chunked_reader(std::size_t, std::size_t, * orc_reader_options const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) */ - explicit impl(std::size_t output_size_limit, - std::size_t data_read_limit, - std::vector>&& sources, - orc_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + explicit reader_impl(std::size_t output_size_limit, + std::size_t data_read_limit, + std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::io::orc::detail::chunked_reader::chunked_reader(std::size_t, std::size_t, * size_type, orc_reader_options const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) */ - explicit impl(std::size_t output_size_limit, - std::size_t data_read_limit, - size_type output_row_granularity, - std::vector>&& sources, - orc_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + explicit reader_impl(std::size_t output_size_limit, + std::size_t data_read_limit, + size_type output_row_granularity, + std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::io::orc::detail::reader::read diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 7b89d63ec11..777a5a6f79f 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -229,7 +229,7 @@ range get_range(std::vector const& input_ranges, range const& selected_ra return {first_range.begin, last_range.end}; } -void reader::impl::global_preprocess(read_mode mode) +void reader_impl::global_preprocess(read_mode mode) { if (_file_itm_data.global_preprocessed) { return; } _file_itm_data.global_preprocessed = true; @@ -447,7 +447,7 @@ void reader::impl::global_preprocess(read_mode mode) find_splits(total_stripe_sizes, num_total_stripes, load_limit); } -void reader::impl::load_data() +void reader_impl::load_data() { if (_file_itm_data.has_no_data()) { return; } diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 2a171a27852..04808d6f0b0 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -690,7 +690,7 @@ std::vector find_table_splits(table_view const& input, } // namespace -void reader::impl::decompress_and_decode() +void reader_impl::decompress_and_decode() { if (_file_itm_data.has_no_data()) { return; } From 75c5b7c9fe0bc50d88099625e489c651b606417c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 28 Mar 2024 14:00:31 -0700 Subject: [PATCH 263/321] Fix spell Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader.cu b/cpp/src/io/orc/reader.cu index bcc1c5b8649..37eb7ab0fd7 100644 --- a/cpp/src/io/orc/reader.cu +++ b/cpp/src/io/orc/reader.cu @@ -58,7 +58,7 @@ chunked_reader::chunked_reader(std::size_t output_size_limit, stream, mr)} { - // Although we internally accept non-positve value for `output_row_granularity` because we + // Although we internally accept non-positive value for `output_row_granularity` because we // implicitly change such value into `DEFAULT_OUTPUT_ROW_GRANULARITY`. // The user are not allowed to do so but instead required to specify an explicit positive number. CUDF_EXPECTS(output_row_granularity > 0, From 861cdccd7fe5ba7b96884c7f095f1c1ead90025b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 28 Mar 2024 15:22:32 -0700 Subject: [PATCH 264/321] Only update `total_stripe_sizes` if in chunked read mode Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 777a5a6f79f..ec766465817 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -351,7 +351,8 @@ void reader_impl::global_preprocess(read_mode mode) // // Accumulate data size for data streams in each stripe. - cudf::detail::hostdevice_vector total_stripe_sizes(num_total_stripes, _stream); + cudf::detail::hostdevice_vector total_stripe_sizes( + mode == read_mode::CHUNKED_READ ? num_total_stripes : std::size_t{0}, _stream); for (std::size_t stripe_global_idx = 0; stripe_global_idx < num_total_stripes; ++stripe_global_idx) { @@ -407,7 +408,9 @@ void reader_impl::global_preprocess(read_mode mode) } } // end loop level - total_stripe_sizes[stripe_global_idx] = {1, this_stripe_size}; + if (mode == read_mode::CHUNKED_READ) { + total_stripe_sizes[stripe_global_idx] = {1, this_stripe_size}; + } // Range of all stream reads in `read_info` corresponding to this stripe, in all levels. stripe_data_read_ranges[stripe_global_idx] = range{last_read_size, read_info.size()}; @@ -422,7 +425,8 @@ void reader_impl::global_preprocess(read_mode mode) _chunk_read_data.curr_load_stripe_range = 0; // Load all stripes if there is no read limit. - if (_chunk_read_data.data_read_limit == 0) { + // In addition, if we are not in chunked read mode, we also load all stripe. + if (mode == read_mode::READ_ALL || _chunk_read_data.data_read_limit == 0) { _chunk_read_data.load_stripe_ranges = {range{0UL, num_total_stripes}}; return; } From ee5070147538f6a01dee3c3383831e5c818acbc9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 28 Mar 2024 16:08:35 -0700 Subject: [PATCH 265/321] Implement optimized code path splitting stripe range in special cases Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 2 +- cpp/src/io/orc/reader_impl.hpp | 4 +- cpp/src/io/orc/reader_impl_chunking.cu | 125 ++++++++++++++++--------- 3 files changed, 84 insertions(+), 47 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index d2c881218cc..f2c8b53fac3 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -38,7 +38,7 @@ void reader_impl::prepare_data(read_mode mode) // - There is more stripe to load, and // - All loaded stripes were decoded, and // - All the decoded results were output. - load_data(); + load_data(mode); } if (_chunk_read_data.more_stripe_to_decode()) { // Only decompress/decode the loaded stripes if: diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 07beecb70d0..17ce7c23a58 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -133,8 +133,10 @@ class reader_impl { * smaller subsets, each of which to be decompressed and decoded in the next step * `decompress_and_decode()`. This is to ensure that loading data from data sources together with * decompression and decoding will be capped around the given data read limit. + * + * @param mode Value indicating if the data sources are read all at once or chunk by chunk */ - void load_data(); + void load_data(read_mode mode); /** * @brief Decompress and decode stripe data in the internal buffers, and store the result into diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index ec766465817..576f25fac6e 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -425,7 +425,7 @@ void reader_impl::global_preprocess(read_mode mode) _chunk_read_data.curr_load_stripe_range = 0; // Load all stripes if there is no read limit. - // In addition, if we are not in chunked read mode, we also load all stripe. + // In addition, if we are not in CHUNKED_READ mode, we also load all stripes. if (mode == read_mode::READ_ALL || _chunk_read_data.data_read_limit == 0) { _chunk_read_data.load_stripe_ranges = {range{0UL, num_total_stripes}}; return; @@ -451,7 +451,7 @@ void reader_impl::global_preprocess(read_mode mode) find_splits(total_stripe_sizes, num_total_stripes, load_limit); } -void reader_impl::load_data() +void reader_impl::load_data(read_mode mode) { if (_file_itm_data.has_no_data()) { return; } @@ -524,6 +524,82 @@ void reader_impl::load_data() CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); } + // Compute number of rows in the loading stripes. + auto const num_loading_rows = [&] { + std::size_t count{0}; + for (std::size_t idx = 0; idx < stripe_count; ++idx) { + count += _file_itm_data.selected_stripes[idx + stripe_start].stripe_info->numberOfRows; + } + return count; + }(); + + // Decoding range is reset to start from the first position in `decode_stripe_ranges`. + _chunk_read_data.curr_decode_stripe_range = 0; + + // Decode all loaded stripes if there is no read limit, or if we are not in chunked_read mode. + // In theory, we should just decode enough stripes for output one table chunk, instead of + // decoding all stripes like this, for better load-balancing and reduce memory usage. + // However, we do not know how many stripes are 'enough' because there is not any simple and + // cheap way to compute the exact decoded sizes of stripes without actually decoding them. + if ((mode == read_mode::READ_ALL || _chunk_read_data.data_read_limit == 0) && + // In addition to read limit, we also need to check if the the total number of + // rows in the loaded stripes exceeds column size limit. + // If that is the case, we cannot read all stripes at once. + num_loading_rows < static_cast(std::numeric_limits::max())) { + _chunk_read_data.decode_stripe_ranges = {load_stripe_range}; + return; + } + + // For estimating the decompressed sizes of the loaded stripes. + // Only valid in CHUNKED_READ mode. + cudf::detail::hostdevice_vector stripe_decomp_sizes( + mode == read_mode::CHUNKED_READ ? stripe_count : std::size_t{0}, _stream); + + // For mapping stripe to the number of rows in it. + // Only valid in READ_ALL mode. + // This is similar to store exactly the same data as for `stripe_decomp_size` but + // does not allocate device memory. + std::vector stripe_rows(mode == read_mode::READ_ALL ? stripe_count + : std::size_t{0}); + + // Fill up the `cumulative_size_and_row` array. + auto const stripe_sizes_rows_ptr = + mode == read_mode::CHUNKED_READ ? stripe_decomp_sizes.begin() : stripe_rows.data(); + for (std::size_t idx = 0; idx < stripe_count; ++idx) { + auto const& stripe = _file_itm_data.selected_stripes[idx + stripe_start]; + auto const stripe_info = stripe.stripe_info; + stripe_sizes_rows_ptr[idx] = + cumulative_size_and_row{1UL /*count*/, 0UL /*size_bytes*/, stripe_info->numberOfRows}; + } + + // This is the post-processing step after we've done with splitting `load_stripe_range` into + // `decode_stripe_ranges`. + auto const add_range_offset = [stripe_start](std::vector& new_ranges) { + // The split ranges always start from zero. + // We need to change these ranges to start from `stripe_start` which are the correct subranges + // of the current loaded stripe range. + for (auto& range : new_ranges) { + range.begin += stripe_start; + range.end += stripe_start; + } + }; + + // + // Optimized code path when we do not have any read limit but the number of rows in the + // loaded stripes exceeds column size limit. + // + if ((mode == read_mode::READ_ALL || _chunk_read_data.data_read_limit == 0) && + num_loading_rows >= static_cast(std::numeric_limits::max())) { + // Here we will split based on number of rows, not data size. + // Thus, we use a maximum possible value for size_limit. + _chunk_read_data.decode_stripe_ranges = find_splits( + cudf::host_span(stripe_sizes_rows_ptr, stripe_count), + stripe_count, + std::numeric_limits::max()); + add_range_offset(_chunk_read_data.decode_stripe_ranges); + return; + } + // // Split range of loaded stripes into subranges that can be decoded separately without blowing up // memory: @@ -534,20 +610,6 @@ void reader_impl::load_data() // decompression and decoding. stream_source_map stream_compinfo_map; - // For estimating the decompressed sizes of the loaded stripes. - cudf::detail::hostdevice_vector stripe_decomp_sizes(stripe_count, - _stream); - - // Number of rows in the loading stripes. - std::size_t num_loading_rows{0}; - - for (std::size_t idx = 0; idx < stripe_count; ++idx) { - auto const& stripe = _file_itm_data.selected_stripes[idx + stripe_start]; - auto const stripe_info = stripe.stripe_info; - stripe_decomp_sizes[idx] = cumulative_size_and_row{1, 0, stripe_info->numberOfRows}; - num_loading_rows += stripe_info->numberOfRows; - } - auto& compinfo_map = _file_itm_data.compinfo_map; for (std::size_t level = 0; level < num_levels; ++level) { @@ -608,23 +670,6 @@ void reader_impl::load_data() } } // end loop level - // Decoding range is reset to start from the first position in `decode_stripe_ranges`. - _chunk_read_data.curr_decode_stripe_range = 0; - - // Decode all loaded stripes if there is no read limit. - // In theory, we should just decode enough stripes for output one table chunk, instead of - // decoding all stripes like this, for better load-balancing and reduce memory usage. - // However, we do not know how many stripes are 'enough' because there is not any simple and - // cheap way to compute the exact decoded sizes of stripes without actually decoding them. - if (_chunk_read_data.data_read_limit == 0 && - // In addition to read limit, we also need to check if the the total number of - // rows in the loaded stripes exceeds column size limit. - // If that is the case, we cannot read all stripes at once. - num_loading_rows < static_cast(std::numeric_limits::max())) { - _chunk_read_data.decode_stripe_ranges = {load_stripe_range}; - return; - } - // Compute the prefix sum of stripe data sizes and rows. stripe_decomp_sizes.host_to_device_async(_stream); thrust::inclusive_scan(rmm::exec_policy_nosync(_stream), @@ -635,26 +680,16 @@ void reader_impl::load_data() stripe_decomp_sizes.device_to_host_sync(_stream); auto const decode_limit = [&] { - // In this case, we have no read limit but have to split due to having number of rows in loaded - // stripes exceeds column size limit. So we will split based on row number, not data size. - if (_chunk_read_data.data_read_limit == 0) { return std::numeric_limits::max(); } - - // If `data_read_limit` is too small, make sure not to pass 0 byte limit to `find_splits`. auto const tmp = static_cast(_chunk_read_data.data_read_limit * chunk_read_data::decode_limit_ratio); + // Make sure not to pass 0 byte limit to `find_splits`. return tmp > 0UL ? tmp : 1UL; }(); _chunk_read_data.decode_stripe_ranges = find_splits(stripe_decomp_sizes, stripe_count, decode_limit); - // The split ranges always start from zero. - // We need to change these ranges to start from `stripe_start` which are the correct subranges of - // the current loaded stripe range. - for (auto& range : _chunk_read_data.decode_stripe_ranges) { - range.begin += stripe_start; - range.end += stripe_start; - } + add_range_offset(_chunk_read_data.decode_stripe_ranges); } } // namespace cudf::io::orc::detail From b976d99d1d3e1b9806414e5ea1977ef0768b4d5e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 29 Mar 2024 09:58:02 -0700 Subject: [PATCH 266/321] Remove test Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cu | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index ba57145465e..39450bb2a9f 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1266,13 +1266,6 @@ TEST_F(OrcChunkedReaderInputLimitTest, MixedColumnsHavingList) input_limit{128 * 1024 * 1024UL}, expected); } - - // TODO: remove - { - int constexpr expected[] = {1, 1, 1}; - input_limit_test_read( - __LINE__, test_files, input, output_limit{0UL}, input_limit{0UL}, expected); - } } TEST_F(OrcChunkedReaderInputLimitTest, ReadWithRowSelection) From ec7303fbf1c6a2f833097e8047a4d6a285ae3736 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 29 Mar 2024 10:23:20 -0700 Subject: [PATCH 267/321] Add `read_mode` param to `decompress_and_decode`, and change comments Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 2 +- cpp/src/io/orc/reader_impl.hpp | 4 +++- cpp/src/io/orc/reader_impl_chunking.cu | 33 ++++++++++++++------------ cpp/src/io/orc/reader_impl_decode.cu | 2 +- 4 files changed, 23 insertions(+), 18 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index f2c8b53fac3..566c8a059d8 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -44,7 +44,7 @@ void reader_impl::prepare_data(read_mode mode) // Only decompress/decode the loaded stripes if: // - There are loaded stripes that were not decoded yet, and // - All the decoded results were output. - decompress_and_decode(); + decompress_and_decode(mode); } } } diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 17ce7c23a58..e8c3adfe1f9 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -144,8 +144,10 @@ class reader_impl { * * This function expects that the other preprocessing steps (`global preprocess()` and * `load_data()`) have already been done. + * + * @param mode Value indicating if the data sources are read all at once or chunk by chunk */ - void decompress_and_decode(); + void decompress_and_decode(read_mode mode); /** * @brief Create the output table from the intermediate table and return it along with metadata. diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 576f25fac6e..15ba652c4a4 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -351,6 +351,8 @@ void reader_impl::global_preprocess(read_mode mode) // // Accumulate data size for data streams in each stripe. + // This will be used for CHUNKED_READ mode only. + // If we are in READ_ALL mode, we do not need this since we just load all stripes. cudf::detail::hostdevice_vector total_stripe_sizes( mode == read_mode::CHUNKED_READ ? num_total_stripes : std::size_t{0}, _stream); @@ -424,8 +426,7 @@ void reader_impl::global_preprocess(read_mode mode) // Load range is reset to start from the first position in `load_stripe_ranges`. _chunk_read_data.curr_load_stripe_range = 0; - // Load all stripes if there is no read limit. - // In addition, if we are not in CHUNKED_READ mode, we also load all stripes. + // Load all stripes if there is no read limit or if we are in READ_ALL mode. if (mode == read_mode::READ_ALL || _chunk_read_data.data_read_limit == 0) { _chunk_read_data.load_stripe_ranges = {range{0UL, num_total_stripes}}; return; @@ -536,33 +537,33 @@ void reader_impl::load_data(read_mode mode) // Decoding range is reset to start from the first position in `decode_stripe_ranges`. _chunk_read_data.curr_decode_stripe_range = 0; - // Decode all loaded stripes if there is no read limit, or if we are not in chunked_read mode. - // In theory, we should just decode enough stripes for output one table chunk, instead of + // Decode all loaded stripes if there is no read limit, or if we are in READ_ALL mode. + // In theory, we should just decode 'enough' stripes for output one table chunk, instead of // decoding all stripes like this, for better load-balancing and reduce memory usage. - // However, we do not know how many stripes are 'enough' because there is not any simple and - // cheap way to compute the exact decoded sizes of stripes without actually decoding them. + // However, we do not have any good way to know how many stripes are 'enough'. if ((mode == read_mode::READ_ALL || _chunk_read_data.data_read_limit == 0) && // In addition to read limit, we also need to check if the the total number of // rows in the loaded stripes exceeds column size limit. - // If that is the case, we cannot read all stripes at once. + // If that is the case, we cannot decode all stripes at once. num_loading_rows < static_cast(std::numeric_limits::max())) { _chunk_read_data.decode_stripe_ranges = {load_stripe_range}; return; } // For estimating the decompressed sizes of the loaded stripes. - // Only valid in CHUNKED_READ mode. + // Only used in CHUNKED_READ mode. cudf::detail::hostdevice_vector stripe_decomp_sizes( mode == read_mode::CHUNKED_READ ? stripe_count : std::size_t{0}, _stream); // For mapping stripe to the number of rows in it. - // Only valid in READ_ALL mode. - // This is similar to store exactly the same data as for `stripe_decomp_size` but - // does not allocate device memory. + // Only used in READ_ALL mode. + // This is to store exactly the same data as for `stripe_decomp_size` above but here we do not + // need to allocate device memory. std::vector stripe_rows(mode == read_mode::READ_ALL ? stripe_count : std::size_t{0}); // Fill up the `cumulative_size_and_row` array. + // Note: `hostdevice_vector::begin()` mirrors `std::vector::data()` using incorrect name. auto const stripe_sizes_rows_ptr = mode == read_mode::CHUNKED_READ ? stripe_decomp_sizes.begin() : stripe_rows.data(); for (std::size_t idx = 0; idx < stripe_count; ++idx) { @@ -584,14 +585,16 @@ void reader_impl::load_data(read_mode mode) } }; - // // Optimized code path when we do not have any read limit but the number of rows in the // loaded stripes exceeds column size limit. - // + // Note that the values `max_uncompressed_size` for each stripe are not computed here. + // Instead, they will be computed on the fly during decoding to avoid the overhead of + // storing and retrieving from memory. if ((mode == read_mode::READ_ALL || _chunk_read_data.data_read_limit == 0) && num_loading_rows >= static_cast(std::numeric_limits::max())) { - // Here we will split based on number of rows, not data size. - // Thus, we use a maximum possible value for size_limit. + // Here we will split stripe ranges based on stripes' number of rows, not their data size. + // Thus, we use a maximum possible value for data size limit. + // The function `find_splits` will automatically handle row count limit. _chunk_read_data.decode_stripe_ranges = find_splits( cudf::host_span(stripe_sizes_rows_ptr, stripe_count), stripe_count, diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 04808d6f0b0..90faedd0063 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -690,7 +690,7 @@ std::vector find_table_splits(table_view const& input, } // namespace -void reader_impl::decompress_and_decode() +void reader_impl::decompress_and_decode(read_mode mode) { if (_file_itm_data.has_no_data()) { return; } From acd9689616df414dda50e5e6fea9708447e179ab Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 29 Mar 2024 10:48:18 -0700 Subject: [PATCH 268/321] Compute compinfo on the fly Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_decode.cu | 39 +++++++++++++++++++++------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 90faedd0063..a33c31168a8 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -58,7 +58,7 @@ namespace { * * @param loaded_stripe_range Range of stripes that are already loaded in memory * @param stream_range Range of streams to be decoded - * @param num_decoded_stripes Number of stripes that the decoding streams belong to + * @param num_decode_stripes Number of stripes that the decoding streams belong to * @param compinfo_map A map to lookup compression info of streams * @param decompressor Block decompressor * @param stripe_data List of source stripe column data @@ -84,6 +84,9 @@ rmm::device_buffer decompress_stripe_data( bool use_base_stride, rmm::cuda_stream_view stream) { + // Whether we have the comppression info precomputed. + auto const compinfo_ready = compinfo_map.size() > 0; + // Count the exact number of compressed blocks std::size_t num_compressed_blocks = 0; std::size_t num_uncompressed_blocks = 0; @@ -101,15 +104,33 @@ rmm::device_buffer decompress_stripe_data( info.dst_pos, info.length)); - auto const& cached_comp_info = compinfo_map.at(info.source); - auto& stream_comp_info = compinfo.back(); - stream_comp_info.num_compressed_blocks = cached_comp_info.num_compressed_blocks; - stream_comp_info.num_uncompressed_blocks = cached_comp_info.num_uncompressed_blocks; - stream_comp_info.max_uncompressed_size = cached_comp_info.total_decomp_size; + if (compinfo_ready) { + auto const& cached_comp_info = compinfo_map.at(info.source); + auto& stream_comp_info = compinfo.back(); + stream_comp_info.num_compressed_blocks = cached_comp_info.num_compressed_blocks; + stream_comp_info.num_uncompressed_blocks = cached_comp_info.num_uncompressed_blocks; + stream_comp_info.max_uncompressed_size = cached_comp_info.total_decomp_size; + + num_compressed_blocks += cached_comp_info.num_compressed_blocks; + num_uncompressed_blocks += cached_comp_info.num_uncompressed_blocks; + total_decomp_size += cached_comp_info.total_decomp_size; + } + } - num_compressed_blocks += cached_comp_info.num_compressed_blocks; - num_uncompressed_blocks += cached_comp_info.num_uncompressed_blocks; - total_decomp_size += cached_comp_info.total_decomp_size; + if (!compinfo_ready) { + compinfo.host_to_device_async(stream); + gpu::ParseCompressedStripeData(compinfo.device_ptr(), + compinfo.size(), + decompressor.GetBlockSize(), + decompressor.GetLog2MaxCompressionRatio(), + stream); + compinfo.device_to_host_sync(stream); + + for (std::size_t i = 0; i < compinfo.size(); ++i) { + num_compressed_blocks += compinfo[i].num_compressed_blocks; + num_uncompressed_blocks += compinfo[i].num_uncompressed_blocks; + total_decomp_size += compinfo[i].max_uncompressed_size; + } } CUDF_EXPECTS( From b9f07a253d3f6381a0585702773902019f396e66 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 29 Mar 2024 12:50:14 -0700 Subject: [PATCH 269/321] Separate `compinfo_map` into levels Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 18 +++++++++--------- cpp/src/io/orc/reader_impl_chunking.hpp | 10 ++++------ cpp/src/io/orc/reader_impl_decode.cu | 2 +- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 15ba652c4a4..6e92aa0511a 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -121,11 +121,10 @@ std::size_t gather_stream_info_and_column_desc( (*local_stream_order)++; } else { // not chunks.has_value() - stream_info.value()->emplace_back( - stripeinfo->offset + src_offset, - dst_offset, - stream.length, - stream_source_info{stripe_order, level, column_id, stream.kind}); + stream_info.value()->emplace_back(stripeinfo->offset + src_offset, + dst_offset, + stream.length, + stream_source_info{stripe_order, column_id, stream.kind}); } dst_offset += stream.length; @@ -275,6 +274,7 @@ void reader_impl::global_preprocess(read_mode mode) auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; auto& lvl_stripe_sizes = _file_itm_data.lvl_stripe_sizes; + auto& lvl_compinfo_map = _file_itm_data.lvl_compinfo_map; auto& lvl_stream_info = _file_itm_data.lvl_stream_info; auto& lvl_stripe_stream_ranges = _file_itm_data.lvl_stripe_stream_ranges; auto& lvl_column_types = _file_itm_data.lvl_column_types; @@ -282,6 +282,7 @@ void reader_impl::global_preprocess(read_mode mode) lvl_stripe_data.resize(num_levels); lvl_stripe_sizes.resize(num_levels); + lvl_compinfo_map.resize(num_levels); lvl_stream_info.resize(num_levels); lvl_stripe_stream_ranges.resize(num_levels); lvl_column_types.resize(num_levels); @@ -613,8 +614,6 @@ void reader_impl::load_data(read_mode mode) // decompression and decoding. stream_source_map stream_compinfo_map; - auto& compinfo_map = _file_itm_data.compinfo_map; - for (std::size_t level = 0; level < num_levels; ++level) { auto const& stream_info = _file_itm_data.lvl_stream_info[level]; auto const num_columns = _selected_columns.levels[level].size(); @@ -641,8 +640,7 @@ void reader_impl::load_data(read_mode mode) compinfo.push_back(gpu::CompressedStreamInfo(dst_base + info.dst_pos, info.length)); stream_compinfo_map[stream_source_info{ - info.source.stripe_idx, info.source.level, info.source.orc_col_idx, info.source.kind}] = - &compinfo.back(); + info.source.stripe_idx, info.source.orc_col_idx, info.source.kind}] = &compinfo.back(); } compinfo.host_to_device_async(_stream); @@ -653,6 +651,8 @@ void reader_impl::load_data(read_mode mode) _stream); compinfo.device_to_host_sync(_stream); + auto& compinfo_map = _file_itm_data.lvl_compinfo_map[level]; + compinfo_map.clear(); // clear cache of the last load for (auto& [stream_id, stream_compinfo] : stream_compinfo_map) { // Cache these parsed numbers so they can be reused in the decompression/decoding step. compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 5f958d6d73f..3b193f13441 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -33,7 +33,6 @@ namespace cudf::io::orc::detail { */ struct stream_source_info { std::size_t stripe_idx; // global stripe id throughout all data sources - std::size_t level; // level of the nested column uint32_t orc_col_idx; // orc column id StreamKind kind; // stream kind @@ -41,16 +40,15 @@ struct stream_source_info { std::size_t operator()(stream_source_info const& id) const { auto const hasher = std::hash{}; - return hasher(id.stripe_idx) ^ hasher(id.level) ^ - hasher(static_cast(id.orc_col_idx)) ^ + return hasher(id.stripe_idx) ^ hasher(static_cast(id.orc_col_idx)) ^ hasher(static_cast(id.kind)); } }; struct equal_to { bool operator()(stream_source_info const& lhs, stream_source_info const& rhs) const { - return lhs.stripe_idx == rhs.stripe_idx && lhs.level == rhs.level && - lhs.orc_col_idx == rhs.orc_col_idx && lhs.kind == rhs.kind; + return lhs.stripe_idx == rhs.stripe_idx && lhs.orc_col_idx == rhs.orc_col_idx && + lhs.kind == rhs.kind; } }; }; @@ -144,7 +142,7 @@ struct file_intermediate_data { std::vector stripe_data_read_ranges; // Store the compression information for each data stream. - stream_source_map compinfo_map; + std::vector> lvl_compinfo_map; // Store info for each ORC stream at each nested level. std::vector> lvl_stream_info; diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index a33c31168a8..bc25f716640 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -925,7 +925,7 @@ void reader_impl::decompress_and_decode(read_mode mode) auto decomp_data = decompress_stripe_data(load_stripe_range, stream_range, stripe_count, - _file_itm_data.compinfo_map, + _file_itm_data.lvl_compinfo_map[level], *_metadata.per_file_metadata[0].decompressor, stripe_data, stream_info, From ab1afdcc4541d020d9b7d3fada64216223a49d1a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 29 Mar 2024 13:18:57 -0700 Subject: [PATCH 270/321] Revert "Separate `compinfo_map` into levels" This reverts commit b9f07a253d3f6381a0585702773902019f396e66. --- cpp/src/io/orc/reader_impl_chunking.cu | 18 +++++++++--------- cpp/src/io/orc/reader_impl_chunking.hpp | 10 ++++++---- cpp/src/io/orc/reader_impl_decode.cu | 2 +- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 6e92aa0511a..15ba652c4a4 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -121,10 +121,11 @@ std::size_t gather_stream_info_and_column_desc( (*local_stream_order)++; } else { // not chunks.has_value() - stream_info.value()->emplace_back(stripeinfo->offset + src_offset, - dst_offset, - stream.length, - stream_source_info{stripe_order, column_id, stream.kind}); + stream_info.value()->emplace_back( + stripeinfo->offset + src_offset, + dst_offset, + stream.length, + stream_source_info{stripe_order, level, column_id, stream.kind}); } dst_offset += stream.length; @@ -274,7 +275,6 @@ void reader_impl::global_preprocess(read_mode mode) auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; auto& lvl_stripe_sizes = _file_itm_data.lvl_stripe_sizes; - auto& lvl_compinfo_map = _file_itm_data.lvl_compinfo_map; auto& lvl_stream_info = _file_itm_data.lvl_stream_info; auto& lvl_stripe_stream_ranges = _file_itm_data.lvl_stripe_stream_ranges; auto& lvl_column_types = _file_itm_data.lvl_column_types; @@ -282,7 +282,6 @@ void reader_impl::global_preprocess(read_mode mode) lvl_stripe_data.resize(num_levels); lvl_stripe_sizes.resize(num_levels); - lvl_compinfo_map.resize(num_levels); lvl_stream_info.resize(num_levels); lvl_stripe_stream_ranges.resize(num_levels); lvl_column_types.resize(num_levels); @@ -614,6 +613,8 @@ void reader_impl::load_data(read_mode mode) // decompression and decoding. stream_source_map stream_compinfo_map; + auto& compinfo_map = _file_itm_data.compinfo_map; + for (std::size_t level = 0; level < num_levels; ++level) { auto const& stream_info = _file_itm_data.lvl_stream_info[level]; auto const num_columns = _selected_columns.levels[level].size(); @@ -640,7 +641,8 @@ void reader_impl::load_data(read_mode mode) compinfo.push_back(gpu::CompressedStreamInfo(dst_base + info.dst_pos, info.length)); stream_compinfo_map[stream_source_info{ - info.source.stripe_idx, info.source.orc_col_idx, info.source.kind}] = &compinfo.back(); + info.source.stripe_idx, info.source.level, info.source.orc_col_idx, info.source.kind}] = + &compinfo.back(); } compinfo.host_to_device_async(_stream); @@ -651,8 +653,6 @@ void reader_impl::load_data(read_mode mode) _stream); compinfo.device_to_host_sync(_stream); - auto& compinfo_map = _file_itm_data.lvl_compinfo_map[level]; - compinfo_map.clear(); // clear cache of the last load for (auto& [stream_id, stream_compinfo] : stream_compinfo_map) { // Cache these parsed numbers so they can be reused in the decompression/decoding step. compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 3b193f13441..5f958d6d73f 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -33,6 +33,7 @@ namespace cudf::io::orc::detail { */ struct stream_source_info { std::size_t stripe_idx; // global stripe id throughout all data sources + std::size_t level; // level of the nested column uint32_t orc_col_idx; // orc column id StreamKind kind; // stream kind @@ -40,15 +41,16 @@ struct stream_source_info { std::size_t operator()(stream_source_info const& id) const { auto const hasher = std::hash{}; - return hasher(id.stripe_idx) ^ hasher(static_cast(id.orc_col_idx)) ^ + return hasher(id.stripe_idx) ^ hasher(id.level) ^ + hasher(static_cast(id.orc_col_idx)) ^ hasher(static_cast(id.kind)); } }; struct equal_to { bool operator()(stream_source_info const& lhs, stream_source_info const& rhs) const { - return lhs.stripe_idx == rhs.stripe_idx && lhs.orc_col_idx == rhs.orc_col_idx && - lhs.kind == rhs.kind; + return lhs.stripe_idx == rhs.stripe_idx && lhs.level == rhs.level && + lhs.orc_col_idx == rhs.orc_col_idx && lhs.kind == rhs.kind; } }; }; @@ -142,7 +144,7 @@ struct file_intermediate_data { std::vector stripe_data_read_ranges; // Store the compression information for each data stream. - std::vector> lvl_compinfo_map; + stream_source_map compinfo_map; // Store info for each ORC stream at each nested level. std::vector> lvl_stream_info; diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index bc25f716640..a33c31168a8 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -925,7 +925,7 @@ void reader_impl::decompress_and_decode(read_mode mode) auto decomp_data = decompress_stripe_data(load_stripe_range, stream_range, stripe_count, - _file_itm_data.lvl_compinfo_map[level], + _file_itm_data.compinfo_map, *_metadata.per_file_metadata[0].decompressor, stripe_data, stream_info, From bf5b11133ebd9a5cac709396940f91e7c24dceab Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 29 Mar 2024 13:21:19 -0700 Subject: [PATCH 271/321] Simplify code Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 15ba652c4a4..a592af3d88d 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -640,9 +640,7 @@ void reader_impl::load_data(read_mode mode) static_cast(stripe_data[info.source.stripe_idx - stripe_start].data()); compinfo.push_back(gpu::CompressedStreamInfo(dst_base + info.dst_pos, info.length)); - stream_compinfo_map[stream_source_info{ - info.source.stripe_idx, info.source.level, info.source.orc_col_idx, info.source.kind}] = - &compinfo.back(); + stream_compinfo_map[info.source] = &compinfo.back(); } compinfo.host_to_device_async(_stream); From 4d01ad754224fcfe673f4da35b46cd2f03e23cd5 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 29 Mar 2024 13:47:48 -0700 Subject: [PATCH 272/321] Remove local `stream_compinfo_map` Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 29 ++++++++++---------------- 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index a592af3d88d..1af0ad478ed 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -608,12 +608,8 @@ void reader_impl::load_data(read_mode mode) // memory: // - // A map from a stripe sources into `CompressedStreamInfo*` pointers. - // These pointers are then used to retrieve stripe/level decompressed sizes for later - // decompression and decoding. - stream_source_map stream_compinfo_map; - auto& compinfo_map = _file_itm_data.compinfo_map; + compinfo_map.clear(); // clear cache of the last load for (std::size_t level = 0; level < num_levels; ++level) { auto const& stream_info = _file_itm_data.lvl_stream_info[level]; @@ -630,19 +626,15 @@ void reader_impl::load_data(read_mode mode) if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; - // Cannot be cached as-is, since this is for streams in the current loaded stripe range, - // while the decompression/decoding step would probably use just a subrange of it. cudf::detail::hostdevice_vector compinfo(0, num_streams, _stream); - for (auto stream_idx = stream_range.begin; stream_idx < stream_range.end; ++stream_idx) { auto const& info = stream_info[stream_idx]; auto const dst_base = static_cast(stripe_data[info.source.stripe_idx - stripe_start].data()); - compinfo.push_back(gpu::CompressedStreamInfo(dst_base + info.dst_pos, info.length)); - stream_compinfo_map[info.source] = &compinfo.back(); } + // Estimate the uncompressed data. compinfo.host_to_device_async(_stream); gpu::ParseCompressedStripeData(compinfo.device_ptr(), compinfo.size(), @@ -651,17 +643,18 @@ void reader_impl::load_data(read_mode mode) _stream); compinfo.device_to_host_sync(_stream); - for (auto& [stream_id, stream_compinfo] : stream_compinfo_map) { + for (auto stream_idx = stream_range.begin; stream_idx < stream_range.end; ++stream_idx) { + auto const& info = stream_info[stream_idx]; + auto const stream_compinfo = compinfo[stream_idx - stream_range.begin]; + // Cache these parsed numbers so they can be reused in the decompression/decoding step. - compinfo_map[stream_id] = {stream_compinfo->num_compressed_blocks, - stream_compinfo->num_uncompressed_blocks, - stream_compinfo->max_uncompressed_size}; - stripe_decomp_sizes[stream_id.stripe_idx - stripe_start].size_bytes += - stream_compinfo->max_uncompressed_size; + compinfo_map[info.source] = {stream_compinfo.num_compressed_blocks, + stream_compinfo.num_uncompressed_blocks, + stream_compinfo.max_uncompressed_size}; + stripe_decomp_sizes[info.source.stripe_idx - stripe_start].size_bytes += + stream_compinfo.max_uncompressed_size; } - // Important: must clear this map to reuse the (empty) map for processing the next level. - stream_compinfo_map.clear(); } else { // no decompression // Set decompression sizes equal to the input sizes. for (auto stream_idx = stream_range.begin; stream_idx < stream_range.end; ++stream_idx) { From a06cf49caf8412258edb2a52de039a0f65953764 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 29 Mar 2024 14:00:51 -0700 Subject: [PATCH 273/321] Optimize hashing by combining `orc_col_idx` and `kind` Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 5f958d6d73f..1f368e1211b 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -40,10 +40,10 @@ struct stream_source_info { struct hash { std::size_t operator()(stream_source_info const& id) const { + auto const col_kind = + static_cast(id.orc_col_idx) | (static_cast(id.kind) << 32); auto const hasher = std::hash{}; - return hasher(id.stripe_idx) ^ hasher(id.level) ^ - hasher(static_cast(id.orc_col_idx)) ^ - hasher(static_cast(id.kind)); + return hasher(id.stripe_idx) ^ hasher(id.level) ^ hasher(col_kind); } }; struct equal_to { From 54ed4fdcd60b85a8085cb255256061ed77869ff8 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 29 Mar 2024 14:21:01 -0700 Subject: [PATCH 274/321] Optimize by using one array of compinfo for all levels Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 1af0ad478ed..57e4b4c82a4 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -18,6 +18,7 @@ #include "io/orc/reader_impl.hpp" #include "io/orc/reader_impl_chunking.hpp" #include "io/orc/reader_impl_helpers.hpp" +#include "io/utilities/hostdevice_span.hpp" #include #include @@ -611,6 +612,19 @@ void reader_impl::load_data(read_mode mode) auto& compinfo_map = _file_itm_data.compinfo_map; compinfo_map.clear(); // clear cache of the last load + // Find the maximum number of streams in all levels of the loaded stripes. + auto const max_num_streams = [&] { + std::size_t max_count{0}; + for (std::size_t level = 0; level < num_levels; ++level) { + auto const stream_range = + get_range(_file_itm_data.lvl_stripe_stream_ranges[level], load_stripe_range); + auto const num_streams = stream_range.end - stream_range.begin; + max_count = std::max(max_count, num_streams); + } + return max_count; + }(); + cudf::detail::hostdevice_vector hd_compinfo(max_num_streams, _stream); + for (std::size_t level = 0; level < num_levels; ++level) { auto const& stream_info = _file_itm_data.lvl_stream_info[level]; auto const num_columns = _selected_columns.levels[level].size(); @@ -626,12 +640,14 @@ void reader_impl::load_data(read_mode mode) if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; - cudf::detail::hostdevice_vector compinfo(0, num_streams, _stream); + auto compinfo = cudf::detail::hostdevice_span( + hd_compinfo.begin(), hd_compinfo.d_begin(), num_streams); for (auto stream_idx = stream_range.begin; stream_idx < stream_range.end; ++stream_idx) { auto const& info = stream_info[stream_idx]; auto const dst_base = static_cast(stripe_data[info.source.stripe_idx - stripe_start].data()); - compinfo.push_back(gpu::CompressedStreamInfo(dst_base + info.dst_pos, info.length)); + compinfo[stream_idx - stream_range.begin] = + gpu::CompressedStreamInfo(dst_base + info.dst_pos, info.length); } // Estimate the uncompressed data. From 0447271eebf931e71f0f63a97df0bade939198fc Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 29 Mar 2024 14:42:38 -0700 Subject: [PATCH 275/321] Use only one array of compinfo for all levels Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 24 ++++++++++-------- cpp/src/io/orc/reader_impl_decode.cu | 34 +++++++++++++++++++++----- 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 57e4b4c82a4..406e9558dae 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -612,18 +612,22 @@ void reader_impl::load_data(read_mode mode) auto& compinfo_map = _file_itm_data.compinfo_map; compinfo_map.clear(); // clear cache of the last load - // Find the maximum number of streams in all levels of the loaded stripes. - auto const max_num_streams = [&] { - std::size_t max_count{0}; - for (std::size_t level = 0; level < num_levels; ++level) { - auto const stream_range = - get_range(_file_itm_data.lvl_stripe_stream_ranges[level], load_stripe_range); - auto const num_streams = stream_range.end - stream_range.begin; - max_count = std::max(max_count, num_streams); + // For parsing decompression data. + // We create an array that is large enough to use for all levels, thus only need to allocate + // memory once. + auto hd_compinfo = [&] { + std::size_t max_num_streams{0}; + if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { + // Find the maximum number of streams in all levels of the loaded stripes. + for (std::size_t level = 0; level < num_levels; ++level) { + auto const stream_range = + get_range(_file_itm_data.lvl_stripe_stream_ranges[level], load_stripe_range); + auto const num_streams = stream_range.end - stream_range.begin; + max_num_streams = std::max(max_num_streams, num_streams); + } } - return max_count; + return cudf::detail::hostdevice_vector(max_num_streams, _stream); }(); - cudf::detail::hostdevice_vector hd_compinfo(max_num_streams, _stream); for (std::size_t level = 0; level < num_levels; ++level) { auto const& stream_info = _file_itm_data.lvl_stream_info[level]; diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index a33c31168a8..2221ef66fa1 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -20,6 +20,7 @@ #include "io/orc/reader_impl_chunking.hpp" #include "io/orc/reader_impl_helpers.hpp" #include "io/utilities/config_utils.hpp" +#include "io/utilities/hostdevice_span.hpp" #include #include @@ -74,6 +75,7 @@ rmm::device_buffer decompress_stripe_data( range const& loaded_stripe_range, range const& stream_range, std::size_t num_decode_stripes, + cudf::detail::hostdevice_span compinfo, stream_source_map const& compinfo_map, OrcDecompressor const& decompressor, host_span stripe_data, @@ -92,21 +94,18 @@ rmm::device_buffer decompress_stripe_data( std::size_t num_uncompressed_blocks = 0; std::size_t total_decomp_size = 0; - auto const num_streams = stream_range.end - stream_range.begin; - cudf::detail::hostdevice_vector compinfo(0, num_streams, stream); - for (auto stream_idx = stream_range.begin; stream_idx < stream_range.end; ++stream_idx) { auto const& info = stream_info[stream_idx]; - compinfo.push_back(gpu::CompressedStreamInfo( + auto& stream_comp_info = compinfo[stream_idx - stream_range.begin]; + stream_comp_info = gpu::CompressedStreamInfo( static_cast( stripe_data[info.source.stripe_idx - loaded_stripe_range.begin].data()) + info.dst_pos, - info.length)); + info.length); if (compinfo_ready) { auto const& cached_comp_info = compinfo_map.at(info.source); - auto& stream_comp_info = compinfo.back(); stream_comp_info.num_compressed_blocks = cached_comp_info.num_compressed_blocks; stream_comp_info.num_uncompressed_blocks = cached_comp_info.num_uncompressed_blocks; stream_comp_info.max_uncompressed_size = cached_comp_info.total_decomp_size; @@ -759,15 +758,35 @@ void reader_impl::decompress_and_decode(read_mode mode) // Column descriptors ('chunks'). // Each 'chunk' of data here corresponds to an orc column, in a stripe, at a nested level. + // Unfortunately we cannot create one hostdevice_vector to use for all levels because + // currently we do not have hostdevice_2dspan exists. std::vector> lvl_chunks(num_levels); // For computing null count. std::vector>> null_count_prefix_sums(num_levels); + // For parsing decompression data. + // We create one hostdevice_vector that is large enough to use for all levels, + // thus only need to allocate memory once. + auto hd_compinfo = [&] { + std::size_t max_num_streams{0}; + if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { + // Find the maximum number of streams in all levels of the decoding stripes. + for (std::size_t level = 0; level < num_levels; ++level) { + auto const stream_range = + get_range(_file_itm_data.lvl_stripe_stream_ranges[level], stripe_range); + auto const num_streams = stream_range.end - stream_range.begin; + max_num_streams = std::max(max_num_streams, num_streams); + } + } + return cudf::detail::hostdevice_vector{max_num_streams, _stream}; + }(); + auto& col_meta = *_col_meta; for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto const& stripe_stream_ranges = _file_itm_data.lvl_stripe_stream_ranges[level]; auto const stream_range = get_range(stripe_stream_ranges, stripe_range); + auto const num_streams = stream_range.end - stream_range.begin; auto const& columns_level = _selected_columns.levels[level]; auto const& stream_info = _file_itm_data.lvl_stream_info[level]; @@ -922,9 +941,12 @@ void reader_impl::decompress_and_decode(read_mode mode) // Setup row group descriptors if using indexes. if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { + auto compinfo = cudf::detail::hostdevice_span( + hd_compinfo.begin(), hd_compinfo.d_begin(), num_streams); auto decomp_data = decompress_stripe_data(load_stripe_range, stream_range, stripe_count, + compinfo, _file_itm_data.compinfo_map, *_metadata.per_file_metadata[0].decompressor, stripe_data, From 33c92a97edca3472700e0f20a0be5a7c73d41251 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 29 Mar 2024 16:23:02 -0700 Subject: [PATCH 276/321] Use only one `device_buffer` for storing all stripe data Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 45 ++++++++----- cpp/src/io/orc/reader_impl_chunking.hpp | 12 ++-- cpp/src/io/orc/reader_impl_decode.cu | 85 +++++++++++++------------ 3 files changed, 80 insertions(+), 62 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 406e9558dae..acd62b874af 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -274,14 +274,12 @@ void reader_impl::global_preprocess(read_mode mode) auto& stripe_data_read_ranges = _file_itm_data.stripe_data_read_ranges; stripe_data_read_ranges.resize(num_total_stripes); - auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; auto& lvl_stripe_sizes = _file_itm_data.lvl_stripe_sizes; auto& lvl_stream_info = _file_itm_data.lvl_stream_info; auto& lvl_stripe_stream_ranges = _file_itm_data.lvl_stripe_stream_ranges; auto& lvl_column_types = _file_itm_data.lvl_column_types; auto& lvl_nested_cols = _file_itm_data.lvl_nested_cols; - lvl_stripe_data.resize(num_levels); lvl_stripe_sizes.resize(num_levels); lvl_stream_info.resize(num_levels); lvl_stripe_stream_ranges.resize(num_levels); @@ -462,22 +460,28 @@ void reader_impl::load_data(read_mode mode) auto const stripe_start = load_stripe_range.begin; auto const stripe_end = load_stripe_range.end; auto const stripe_count = stripe_end - stripe_start; + auto const num_levels = _selected_columns.num_levels(); - auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; - auto const num_levels = _selected_columns.num_levels(); + auto& stripe_data_offsets = _file_itm_data.stripe_data_offsets; + stripe_data_offsets.resize(0); + stripe_data_offsets.reserve(num_levels * stripe_count); + stripe_data_offsets.push_back(0); + std::size_t offset{0}; - // Prepare the buffer to read raw data onto. + // Compute the offsets for the memory segments storing data of each stripe. for (std::size_t level = 0; level < num_levels; ++level) { - auto& stripe_data = lvl_stripe_data[level]; - stripe_data.resize(stripe_count); - for (std::size_t idx = 0; idx < stripe_count; ++idx) { - auto const stripe_size = _file_itm_data.lvl_stripe_sizes[level][idx + stripe_start]; - stripe_data[idx] = rmm::device_buffer( - cudf::util::round_up_safe(stripe_size, BUFFER_PADDING_MULTIPLE), _stream); + auto const stripe_size = _file_itm_data.lvl_stripe_sizes[level][idx + stripe_start]; + auto const stripe_data_size = cudf::util::round_up_safe(stripe_size, BUFFER_PADDING_MULTIPLE); + offset += stripe_data_size; + stripe_data_offsets.push_back(offset); } } + // Now we have the total data size of all stripes. Just create one buffer to load all data into. + auto& stripe_data = _file_itm_data.stripe_data; + stripe_data = rmm::device_buffer(stripe_data_offsets.back(), _stream); + // // Load stripe data into memory: // @@ -497,8 +501,11 @@ void reader_impl::load_data(read_mode mode) for (auto read_idx = read_begin; read_idx < read_end; ++read_idx) { auto const& read_info = _file_itm_data.data_read_info[read_idx]; auto const source_ptr = _metadata.per_file_metadata[read_info.source_idx].source; - auto const dst_base = static_cast( - lvl_stripe_data[read_info.level][read_info.stripe_idx - stripe_start].data()); + + // `offset_idx` is the flattened index of the stripe offset in `stripe_data_offsets`. + auto const offset_idx = read_info.level * stripe_count + read_info.stripe_idx - stripe_start; + auto const stripe_offset = stripe_data_offsets[offset_idx]; + auto const dst_base = static_cast(stripe_data.data()) + stripe_offset; if (source_ptr->is_device_read_preferred(read_info.length)) { read_tasks.push_back( @@ -633,8 +640,9 @@ void reader_impl::load_data(read_mode mode) auto const& stream_info = _file_itm_data.lvl_stream_info[level]; auto const num_columns = _selected_columns.levels[level].size(); - auto& stripe_data = lvl_stripe_data[level]; - if (stripe_data.empty()) { continue; } + auto const level_data_size = + stripe_data_offsets[(level + 1) * stripe_count] - stripe_data_offsets[level * stripe_count]; + if (level_data_size == 0) { continue; } // Range of all streams in the loaded stripes. auto const stream_range = @@ -648,8 +656,11 @@ void reader_impl::load_data(read_mode mode) hd_compinfo.begin(), hd_compinfo.d_begin(), num_streams); for (auto stream_idx = stream_range.begin; stream_idx < stream_range.end; ++stream_idx) { auto const& info = stream_info[stream_idx]; - auto const dst_base = - static_cast(stripe_data[info.source.stripe_idx - stripe_start].data()); + + // `offset_idx` is the flattened index of the stripe offset in `stripe_data_offsets`. + auto const offset_idx = level * stripe_count + info.source.stripe_idx - stripe_start; + auto const stripe_offset = stripe_data_offsets[offset_idx]; + auto const dst_base = static_cast(stripe_data.data()) + stripe_offset; compinfo[stream_idx - stream_range.begin] = gpu::CompressedStreamInfo(dst_base + info.dst_pos, info.length); } diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 1f368e1211b..cdc6faaf172 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -143,6 +143,13 @@ struct file_intermediate_data { // Those reads are identified by a chunk of consecutive read info, stored in data_read_info. std::vector stripe_data_read_ranges; + // The buffers to store raw data read from disk, initialized each time calling to `load_data()`. + // After decoding, such buffers can be released. + rmm::device_buffer stripe_data; + + // Offsets into the buffer `stripe_data` for each loaded stripe. + std::vector stripe_data_offsets; + // Store the compression information for each data stream. stream_source_map compinfo_map; @@ -153,11 +160,6 @@ struct file_intermediate_data { // This is used to identify the range of streams for each stripe from that vector. std::vector> lvl_stripe_stream_ranges; - // The buffers to store raw data read from disk, initialized for each reading stripe chunks. - // After decoding, such buffers can be released. - // This can only be implemented after chunked output is ready. - std::vector> lvl_stripe_data; - // Store the size of each stripe at each nested level. // This is used to initialize the stripe_data buffers. std::vector> lvl_stripe_sizes; diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 2221ef66fa1..948925b50c9 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -62,7 +62,7 @@ namespace { * @param num_decode_stripes Number of stripes that the decoding streams belong to * @param compinfo_map A map to lookup compression info of streams * @param decompressor Block decompressor - * @param stripe_data List of source stripe column data + * @param stripe_data Stripe column data * @param stream_info List of stream to column mappings * @param chunks Vector of list of column chunk descriptors * @param row_groups Vector of list of row index descriptors @@ -70,6 +70,8 @@ namespace { * @param use_base_stride Whether to use base stride obtained from meta or use the computed value * @param stream CUDA stream used for device memory operations and kernel launches * @return Device buffer to decompressed data + * + * // TODO: add missing params */ rmm::device_buffer decompress_stripe_data( range const& loaded_stripe_range, @@ -78,7 +80,9 @@ rmm::device_buffer decompress_stripe_data( cudf::detail::hostdevice_span compinfo, stream_source_map const& compinfo_map, OrcDecompressor const& decompressor, - host_span stripe_data, + device_span stripe_data, + host_span stripe_data_offsets, + std::size_t offset_idx_start, host_span stream_info, cudf::detail::hostdevice_2dvector& chunks, cudf::detail::hostdevice_2dvector& row_groups, @@ -98,11 +102,11 @@ rmm::device_buffer decompress_stripe_data( auto const& info = stream_info[stream_idx]; auto& stream_comp_info = compinfo[stream_idx - stream_range.begin]; - stream_comp_info = gpu::CompressedStreamInfo( - static_cast( - stripe_data[info.source.stripe_idx - loaded_stripe_range.begin].data()) + - info.dst_pos, - info.length); + + auto const offset_idx = offset_idx_start + info.source.stripe_idx - loaded_stripe_range.begin; + auto const stripe_offset = stripe_data_offsets[offset_idx]; + auto const dst_base = &stripe_data.data()[stripe_offset]; + stream_comp_info = gpu::CompressedStreamInfo(dst_base + info.dst_pos, info.length); if (compinfo_ready) { auto const& cached_comp_info = compinfo_map.at(info.source); @@ -725,6 +729,7 @@ void reader_impl::decompress_and_decode(read_mode mode) // The start index of loaded stripes. They are different from decoding stripes. auto const load_stripe_range = _chunk_read_data.load_stripe_ranges[_chunk_read_data.curr_load_stripe_range - 1]; + auto const load_stripe_count = load_stripe_range.end - load_stripe_range.begin; auto const load_stripe_start = load_stripe_range.begin; auto const rows_to_skip = _file_itm_data.rows_to_skip; @@ -782,7 +787,13 @@ void reader_impl::decompress_and_decode(read_mode mode) return cudf::detail::hostdevice_vector{max_num_streams, _stream}; }(); - auto& col_meta = *_col_meta; + auto const& stripe_data_offsets = _file_itm_data.stripe_data_offsets; + auto const& stripe_data = _file_itm_data.stripe_data; + auto& col_meta = *_col_meta; + + // To store the output decompressed buffers, which need to be kept alive until we decode them. + std::vector decompressed_buffers; + for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto const& stripe_stream_ranges = _file_itm_data.lvl_stripe_stream_ranges[level]; auto const stream_range = get_range(stripe_stream_ranges, stripe_range); @@ -793,8 +804,7 @@ void reader_impl::decompress_and_decode(read_mode mode) auto const& column_types = _file_itm_data.lvl_column_types[level]; auto const& nested_cols = _file_itm_data.lvl_nested_cols[level]; - auto& stripe_data = _file_itm_data.lvl_stripe_data[level]; - auto& chunks = lvl_chunks[level]; + auto& chunks = lvl_chunks[level]; auto const num_level_columns = columns_level.size(); chunks = @@ -852,8 +862,10 @@ void reader_impl::decompress_and_decode(read_mode mode) CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, "Invalid index rowgroup stream data"); - auto const dst_base = - static_cast(stripe_data[stripe_idx - load_stripe_start].data()); + // `offset_idx` is the flattened index of the stripe offset in `stripe_data_offsets`. + auto const offset_idx = level * load_stripe_count + stripe_idx - load_stripe_start; + auto const stripe_offset = stripe_data_offsets[offset_idx]; + auto const dst_base = static_cast(stripe_data.data()) + stripe_offset; auto const num_rows_in_stripe = static_cast(stripe_info->numberOfRows); uint32_t const rowgroup_id = num_rowgroups; @@ -917,7 +929,9 @@ void reader_impl::decompress_and_decode(read_mode mode) num_rowgroups += stripe_num_rowgroups; } - if (stripe_data.empty()) { continue; } + auto const level_data_size = + stripe_data_offsets[(level + 1) * stripe_count] - stripe_data_offsets[level * stripe_count]; + if (level_data_size == 0) { continue; } // Process dataset chunks into output columns. auto row_groups = @@ -943,26 +957,26 @@ void reader_impl::decompress_and_decode(read_mode mode) if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { auto compinfo = cudf::detail::hostdevice_span( hd_compinfo.begin(), hd_compinfo.d_begin(), num_streams); - auto decomp_data = decompress_stripe_data(load_stripe_range, - stream_range, - stripe_count, - compinfo, - _file_itm_data.compinfo_map, - *_metadata.per_file_metadata[0].decompressor, - stripe_data, - stream_info, - chunks, - row_groups, - _metadata.get_row_index_stride(), - level == 0, - _stream); + auto decomp_data = decompress_stripe_data( + load_stripe_range, + stream_range, + stripe_count, + compinfo, + _file_itm_data.compinfo_map, + *_metadata.per_file_metadata[0].decompressor, + device_span{static_cast(stripe_data.data()), + stripe_data.size()}, + stripe_data_offsets, + level * load_stripe_count, + stream_info, + chunks, + row_groups, + _metadata.get_row_index_stride(), + level == 0, + _stream); // Just save the decompressed data and clear out the raw data to free up memory. - stripe_data[stripe_start - load_stripe_start] = std::move(decomp_data); - for (std::size_t i = 1; i < stripe_count; ++i) { - stripe_data[i + stripe_start - load_stripe_start] = {}; - } - + decompressed_buffers.emplace_back(std::move(decomp_data)); } else { if (row_groups.size().first) { chunks.host_to_device_async(_stream); @@ -1049,15 +1063,6 @@ void reader_impl::decompress_and_decode(read_mode mode) // Free up temp memory used for decoding. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { _out_buffers[level].resize(0); - - auto& stripe_data = _file_itm_data.lvl_stripe_data[level]; - if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { - stripe_data[stripe_start - load_stripe_start] = {}; - } else { - for (std::size_t i = 0; i < stripe_count; ++i) { - stripe_data[i + stripe_start - load_stripe_start] = {}; - } - } } // Output table range is reset to start from the first position. From 0a16bb433fa8b40e736dfa8709e20c9517913d0a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sat, 30 Mar 2024 23:21:51 -0700 Subject: [PATCH 277/321] Revert "Use only one `device_buffer` for storing all stripe data" This reverts commit 33c92a97edca3472700e0f20a0be5a7c73d41251. --- cpp/src/io/orc/reader_impl_chunking.cu | 45 +++++-------- cpp/src/io/orc/reader_impl_chunking.hpp | 12 ++-- cpp/src/io/orc/reader_impl_decode.cu | 85 ++++++++++++------------- 3 files changed, 62 insertions(+), 80 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index acd62b874af..406e9558dae 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -274,12 +274,14 @@ void reader_impl::global_preprocess(read_mode mode) auto& stripe_data_read_ranges = _file_itm_data.stripe_data_read_ranges; stripe_data_read_ranges.resize(num_total_stripes); + auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; auto& lvl_stripe_sizes = _file_itm_data.lvl_stripe_sizes; auto& lvl_stream_info = _file_itm_data.lvl_stream_info; auto& lvl_stripe_stream_ranges = _file_itm_data.lvl_stripe_stream_ranges; auto& lvl_column_types = _file_itm_data.lvl_column_types; auto& lvl_nested_cols = _file_itm_data.lvl_nested_cols; + lvl_stripe_data.resize(num_levels); lvl_stripe_sizes.resize(num_levels); lvl_stream_info.resize(num_levels); lvl_stripe_stream_ranges.resize(num_levels); @@ -460,28 +462,22 @@ void reader_impl::load_data(read_mode mode) auto const stripe_start = load_stripe_range.begin; auto const stripe_end = load_stripe_range.end; auto const stripe_count = stripe_end - stripe_start; - auto const num_levels = _selected_columns.num_levels(); - auto& stripe_data_offsets = _file_itm_data.stripe_data_offsets; - stripe_data_offsets.resize(0); - stripe_data_offsets.reserve(num_levels * stripe_count); - stripe_data_offsets.push_back(0); - std::size_t offset{0}; + auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; + auto const num_levels = _selected_columns.num_levels(); - // Compute the offsets for the memory segments storing data of each stripe. + // Prepare the buffer to read raw data onto. for (std::size_t level = 0; level < num_levels; ++level) { + auto& stripe_data = lvl_stripe_data[level]; + stripe_data.resize(stripe_count); + for (std::size_t idx = 0; idx < stripe_count; ++idx) { - auto const stripe_size = _file_itm_data.lvl_stripe_sizes[level][idx + stripe_start]; - auto const stripe_data_size = cudf::util::round_up_safe(stripe_size, BUFFER_PADDING_MULTIPLE); - offset += stripe_data_size; - stripe_data_offsets.push_back(offset); + auto const stripe_size = _file_itm_data.lvl_stripe_sizes[level][idx + stripe_start]; + stripe_data[idx] = rmm::device_buffer( + cudf::util::round_up_safe(stripe_size, BUFFER_PADDING_MULTIPLE), _stream); } } - // Now we have the total data size of all stripes. Just create one buffer to load all data into. - auto& stripe_data = _file_itm_data.stripe_data; - stripe_data = rmm::device_buffer(stripe_data_offsets.back(), _stream); - // // Load stripe data into memory: // @@ -501,11 +497,8 @@ void reader_impl::load_data(read_mode mode) for (auto read_idx = read_begin; read_idx < read_end; ++read_idx) { auto const& read_info = _file_itm_data.data_read_info[read_idx]; auto const source_ptr = _metadata.per_file_metadata[read_info.source_idx].source; - - // `offset_idx` is the flattened index of the stripe offset in `stripe_data_offsets`. - auto const offset_idx = read_info.level * stripe_count + read_info.stripe_idx - stripe_start; - auto const stripe_offset = stripe_data_offsets[offset_idx]; - auto const dst_base = static_cast(stripe_data.data()) + stripe_offset; + auto const dst_base = static_cast( + lvl_stripe_data[read_info.level][read_info.stripe_idx - stripe_start].data()); if (source_ptr->is_device_read_preferred(read_info.length)) { read_tasks.push_back( @@ -640,9 +633,8 @@ void reader_impl::load_data(read_mode mode) auto const& stream_info = _file_itm_data.lvl_stream_info[level]; auto const num_columns = _selected_columns.levels[level].size(); - auto const level_data_size = - stripe_data_offsets[(level + 1) * stripe_count] - stripe_data_offsets[level * stripe_count]; - if (level_data_size == 0) { continue; } + auto& stripe_data = lvl_stripe_data[level]; + if (stripe_data.empty()) { continue; } // Range of all streams in the loaded stripes. auto const stream_range = @@ -656,11 +648,8 @@ void reader_impl::load_data(read_mode mode) hd_compinfo.begin(), hd_compinfo.d_begin(), num_streams); for (auto stream_idx = stream_range.begin; stream_idx < stream_range.end; ++stream_idx) { auto const& info = stream_info[stream_idx]; - - // `offset_idx` is the flattened index of the stripe offset in `stripe_data_offsets`. - auto const offset_idx = level * stripe_count + info.source.stripe_idx - stripe_start; - auto const stripe_offset = stripe_data_offsets[offset_idx]; - auto const dst_base = static_cast(stripe_data.data()) + stripe_offset; + auto const dst_base = + static_cast(stripe_data[info.source.stripe_idx - stripe_start].data()); compinfo[stream_idx - stream_range.begin] = gpu::CompressedStreamInfo(dst_base + info.dst_pos, info.length); } diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index cdc6faaf172..1f368e1211b 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -143,13 +143,6 @@ struct file_intermediate_data { // Those reads are identified by a chunk of consecutive read info, stored in data_read_info. std::vector stripe_data_read_ranges; - // The buffers to store raw data read from disk, initialized each time calling to `load_data()`. - // After decoding, such buffers can be released. - rmm::device_buffer stripe_data; - - // Offsets into the buffer `stripe_data` for each loaded stripe. - std::vector stripe_data_offsets; - // Store the compression information for each data stream. stream_source_map compinfo_map; @@ -160,6 +153,11 @@ struct file_intermediate_data { // This is used to identify the range of streams for each stripe from that vector. std::vector> lvl_stripe_stream_ranges; + // The buffers to store raw data read from disk, initialized for each reading stripe chunks. + // After decoding, such buffers can be released. + // This can only be implemented after chunked output is ready. + std::vector> lvl_stripe_data; + // Store the size of each stripe at each nested level. // This is used to initialize the stripe_data buffers. std::vector> lvl_stripe_sizes; diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 948925b50c9..2221ef66fa1 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -62,7 +62,7 @@ namespace { * @param num_decode_stripes Number of stripes that the decoding streams belong to * @param compinfo_map A map to lookup compression info of streams * @param decompressor Block decompressor - * @param stripe_data Stripe column data + * @param stripe_data List of source stripe column data * @param stream_info List of stream to column mappings * @param chunks Vector of list of column chunk descriptors * @param row_groups Vector of list of row index descriptors @@ -70,8 +70,6 @@ namespace { * @param use_base_stride Whether to use base stride obtained from meta or use the computed value * @param stream CUDA stream used for device memory operations and kernel launches * @return Device buffer to decompressed data - * - * // TODO: add missing params */ rmm::device_buffer decompress_stripe_data( range const& loaded_stripe_range, @@ -80,9 +78,7 @@ rmm::device_buffer decompress_stripe_data( cudf::detail::hostdevice_span compinfo, stream_source_map const& compinfo_map, OrcDecompressor const& decompressor, - device_span stripe_data, - host_span stripe_data_offsets, - std::size_t offset_idx_start, + host_span stripe_data, host_span stream_info, cudf::detail::hostdevice_2dvector& chunks, cudf::detail::hostdevice_2dvector& row_groups, @@ -102,11 +98,11 @@ rmm::device_buffer decompress_stripe_data( auto const& info = stream_info[stream_idx]; auto& stream_comp_info = compinfo[stream_idx - stream_range.begin]; - - auto const offset_idx = offset_idx_start + info.source.stripe_idx - loaded_stripe_range.begin; - auto const stripe_offset = stripe_data_offsets[offset_idx]; - auto const dst_base = &stripe_data.data()[stripe_offset]; - stream_comp_info = gpu::CompressedStreamInfo(dst_base + info.dst_pos, info.length); + stream_comp_info = gpu::CompressedStreamInfo( + static_cast( + stripe_data[info.source.stripe_idx - loaded_stripe_range.begin].data()) + + info.dst_pos, + info.length); if (compinfo_ready) { auto const& cached_comp_info = compinfo_map.at(info.source); @@ -729,7 +725,6 @@ void reader_impl::decompress_and_decode(read_mode mode) // The start index of loaded stripes. They are different from decoding stripes. auto const load_stripe_range = _chunk_read_data.load_stripe_ranges[_chunk_read_data.curr_load_stripe_range - 1]; - auto const load_stripe_count = load_stripe_range.end - load_stripe_range.begin; auto const load_stripe_start = load_stripe_range.begin; auto const rows_to_skip = _file_itm_data.rows_to_skip; @@ -787,13 +782,7 @@ void reader_impl::decompress_and_decode(read_mode mode) return cudf::detail::hostdevice_vector{max_num_streams, _stream}; }(); - auto const& stripe_data_offsets = _file_itm_data.stripe_data_offsets; - auto const& stripe_data = _file_itm_data.stripe_data; - auto& col_meta = *_col_meta; - - // To store the output decompressed buffers, which need to be kept alive until we decode them. - std::vector decompressed_buffers; - + auto& col_meta = *_col_meta; for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto const& stripe_stream_ranges = _file_itm_data.lvl_stripe_stream_ranges[level]; auto const stream_range = get_range(stripe_stream_ranges, stripe_range); @@ -804,7 +793,8 @@ void reader_impl::decompress_and_decode(read_mode mode) auto const& column_types = _file_itm_data.lvl_column_types[level]; auto const& nested_cols = _file_itm_data.lvl_nested_cols[level]; - auto& chunks = lvl_chunks[level]; + auto& stripe_data = _file_itm_data.lvl_stripe_data[level]; + auto& chunks = lvl_chunks[level]; auto const num_level_columns = columns_level.size(); chunks = @@ -862,10 +852,8 @@ void reader_impl::decompress_and_decode(read_mode mode) CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0, "Invalid index rowgroup stream data"); - // `offset_idx` is the flattened index of the stripe offset in `stripe_data_offsets`. - auto const offset_idx = level * load_stripe_count + stripe_idx - load_stripe_start; - auto const stripe_offset = stripe_data_offsets[offset_idx]; - auto const dst_base = static_cast(stripe_data.data()) + stripe_offset; + auto const dst_base = + static_cast(stripe_data[stripe_idx - load_stripe_start].data()); auto const num_rows_in_stripe = static_cast(stripe_info->numberOfRows); uint32_t const rowgroup_id = num_rowgroups; @@ -929,9 +917,7 @@ void reader_impl::decompress_and_decode(read_mode mode) num_rowgroups += stripe_num_rowgroups; } - auto const level_data_size = - stripe_data_offsets[(level + 1) * stripe_count] - stripe_data_offsets[level * stripe_count]; - if (level_data_size == 0) { continue; } + if (stripe_data.empty()) { continue; } // Process dataset chunks into output columns. auto row_groups = @@ -957,26 +943,26 @@ void reader_impl::decompress_and_decode(read_mode mode) if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { auto compinfo = cudf::detail::hostdevice_span( hd_compinfo.begin(), hd_compinfo.d_begin(), num_streams); - auto decomp_data = decompress_stripe_data( - load_stripe_range, - stream_range, - stripe_count, - compinfo, - _file_itm_data.compinfo_map, - *_metadata.per_file_metadata[0].decompressor, - device_span{static_cast(stripe_data.data()), - stripe_data.size()}, - stripe_data_offsets, - level * load_stripe_count, - stream_info, - chunks, - row_groups, - _metadata.get_row_index_stride(), - level == 0, - _stream); + auto decomp_data = decompress_stripe_data(load_stripe_range, + stream_range, + stripe_count, + compinfo, + _file_itm_data.compinfo_map, + *_metadata.per_file_metadata[0].decompressor, + stripe_data, + stream_info, + chunks, + row_groups, + _metadata.get_row_index_stride(), + level == 0, + _stream); // Just save the decompressed data and clear out the raw data to free up memory. - decompressed_buffers.emplace_back(std::move(decomp_data)); + stripe_data[stripe_start - load_stripe_start] = std::move(decomp_data); + for (std::size_t i = 1; i < stripe_count; ++i) { + stripe_data[i + stripe_start - load_stripe_start] = {}; + } + } else { if (row_groups.size().first) { chunks.host_to_device_async(_stream); @@ -1063,6 +1049,15 @@ void reader_impl::decompress_and_decode(read_mode mode) // Free up temp memory used for decoding. for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { _out_buffers[level].resize(0); + + auto& stripe_data = _file_itm_data.lvl_stripe_data[level]; + if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { + stripe_data[stripe_start - load_stripe_start] = {}; + } else { + for (std::size_t i = 0; i < stripe_count; ++i) { + stripe_data[i + stripe_start - load_stripe_start] = {}; + } + } } // Output table range is reset to start from the first position. From 3f8a2202c047434ef6be42f32a5a4ded92c31fa6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 1 Apr 2024 21:29:33 -0700 Subject: [PATCH 278/321] Revert changes to `reader` class Signed-off-by: Nghia Truong --- cpp/include/cudf/io/detail/orc.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index 32b28692140..bf042b35fe0 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -44,7 +44,7 @@ class reader_impl; * @brief Class to read ORC dataset data into columns. */ class reader { - protected: + private: std::unique_ptr _impl; public: @@ -64,7 +64,7 @@ class reader { /** * @brief Destructor explicitly declared to avoid inlining in header */ - virtual ~reader(); + ~reader(); /** * @brief Reads the entire dataset. @@ -78,6 +78,7 @@ class reader { * @brief The reader class that supports iterative reading from an array of data sources. */ class chunked_reader { + private: std::unique_ptr _impl; public: From cbb3858b85bc8aa4eba17a868170680611c6c8ca Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 1 Apr 2024 22:01:42 -0700 Subject: [PATCH 279/321] Use byte count instead of bit count Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_decode.cu | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 2221ef66fa1..cd22a9e3703 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -692,7 +692,7 @@ std::vector find_table_splits(table_view const& input, // the last segment may be shorter than the others. auto const current_length = cuda::std::min(segment_length, num_rows - segment_length * segment_idx); - auto const size = d_sizes[segment_idx]; + auto const size = d_sizes[segment_idx] / CHAR_BIT; // divide by CHAR_BIT to get size in bytes return cumulative_size{static_cast(current_length), static_cast(size)}; }); @@ -704,8 +704,7 @@ std::vector find_table_splits(table_view const& input, cumulative_size_sum{}); segmented_sizes.device_to_host_sync(stream); - // Since the segment sizes are in bits, we need to multiply CHAR_BIT with the output limit. - return find_splits(segmented_sizes, input.num_rows(), size_limit * CHAR_BIT); + return find_splits(segmented_sizes, input.num_rows(), size_limit); } } // namespace From 1c62ba790fa003717879031b423b44de046947cd Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 1 Apr 2024 22:01:54 -0700 Subject: [PATCH 280/321] Change bench limits --- cpp/benchmarks/io/orc/orc_reader_input.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index e710219852e..9ddcaeb36e4 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -188,6 +188,8 @@ NVBENCH_BENCH_TYPES(BM_orc_read_io_compression, NVBENCH_TYPE_AXES(io_list, compr .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}); +std::size_t constexpr MB_bytes{1024 * 1024}; + // Should have the same parameters as `BM_orc_read_io_compression` for comparison. NVBENCH_BENCH_TYPES(BM_orc_chunked_read_io_compression, NVBENCH_TYPE_AXES(io_list, compression_list)) @@ -196,5 +198,6 @@ NVBENCH_BENCH_TYPES(BM_orc_chunked_read_io_compression, .set_min_samples(4) .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}) - .add_int64_axis("output_limit", {0, 500'000}) - .add_int64_axis("read_limit", {0, 500'000}); + // The input has approximately 520MB and 127K rows. + .add_int64_axis("output_limit", {100 * MB_bytes, 500 * MB_bytes}) + .add_int64_axis("read_limit", {100 * MB_bytes, 500 * MB_bytes}); From 1c2bdc18b9a7f86e9c7477028eee94ffe8d1ca70 Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Tue, 2 Apr 2024 13:34:09 -0600 Subject: [PATCH 281/321] Update cpp/benchmarks/io/orc/orc_reader_input.cpp --- cpp/benchmarks/io/orc/orc_reader_input.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index 9ddcaeb36e4..9daf87b9cd3 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -188,7 +188,7 @@ NVBENCH_BENCH_TYPES(BM_orc_read_io_compression, NVBENCH_TYPE_AXES(io_list, compr .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}); -std::size_t constexpr MB_bytes{1024 * 1024}; +std::size_t constexpr Mbytes{1024 * 1024}; // Should have the same parameters as `BM_orc_read_io_compression` for comparison. NVBENCH_BENCH_TYPES(BM_orc_chunked_read_io_compression, From 7d12de242a99cfb754214df7c90695a71fa99820 Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Tue, 2 Apr 2024 13:34:17 -0600 Subject: [PATCH 282/321] Update cpp/benchmarks/io/orc/orc_reader_input.cpp --- cpp/benchmarks/io/orc/orc_reader_input.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index 9daf87b9cd3..58b43367382 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -199,5 +199,5 @@ NVBENCH_BENCH_TYPES(BM_orc_chunked_read_io_compression, .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}) // The input has approximately 520MB and 127K rows. - .add_int64_axis("output_limit", {100 * MB_bytes, 500 * MB_bytes}) - .add_int64_axis("read_limit", {100 * MB_bytes, 500 * MB_bytes}); + .add_int64_axis("output_limit", {100 * Mbytes, 500 * Mbytes}) + .add_int64_axis("read_limit", {100 * Mbytes, 500 * Mbytes}); From c426e4c1a98529383be9ee812fca46104d44f4b3 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 3 Apr 2024 10:16:49 -0700 Subject: [PATCH 283/321] Fix/add comment and cleanup Signed-off-by: Nghia Truong --- cpp/include/cudf/io/orc.hpp | 2 +- cpp/tests/io/orc_chunked_reader_test.cu | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index a28011feb8f..f1d20cc2094 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -477,7 +477,7 @@ class chunked_orc_reader { * @brief Construct the reader from input/output size limits along with other ORC reader options. * * This constructor implicitly call the other constructor with `output_row_granularity` set to - * 10'000 rows. + * `DEFAULT_OUTPUT_ROW_GRANULARITY` rows. * * @param output_size_limit Limit on total number of bytes to be returned per `read_chunk()` call, * or `0` if there is no limit diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 39450bb2a9f..59727c6d5fc 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -44,9 +44,6 @@ #include -#include -#include - namespace { enum class output_limit : std::size_t {}; enum class input_limit : std::size_t {}; @@ -1413,6 +1410,7 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) CUDF_TEST_EXPECT_TABLES_EQUAL(expected, read_result->view()); } + // The test below requires a huge amount of memory, thus it is disabled by default. #ifdef LOCAL_TEST // Read with only output limit -- there is no limit on the memory usage. // However, the reader should be able to detect and load only enough stripes each time From faea7bc6922393f24b4bc67ed0e62a06529f7557 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 3 Apr 2024 10:21:08 -0700 Subject: [PATCH 284/321] Use pointers instead of optionals Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 27 ++++++++++++------------- cpp/src/io/orc/reader_impl_chunking.hpp | 4 ++-- cpp/src/io/orc/reader_impl_decode.cu | 2 +- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 406e9558dae..c1209569285 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -47,10 +47,10 @@ std::size_t gather_stream_info_and_column_desc( bool apply_struct_map, int64_t* num_dictionary_entries, std::size_t* local_stream_order, - std::optional*> const& stream_info, - std::optional*> const& chunks) + std::vector* stream_info, + cudf::detail::hostdevice_2dvector* chunks) { - CUDF_EXPECTS(stream_info.has_value() ^ chunks.has_value(), + CUDF_EXPECTS((stream_info == nullptr) ^ (chunks == nullptr), "Either stream_info or chunks must be provided, but not both."); std::size_t src_offset = 0; @@ -92,8 +92,8 @@ std::size_t gather_stream_info_and_column_desc( auto const child_idx = (idx < orc2gdf.size()) ? orc2gdf[idx] : -1; if (child_idx >= 0) { col = child_idx; - if (chunks.has_value()) { - auto& chunk = (*chunks.value())[stripe_order][col]; + if (chunks) { + auto& chunk = (*chunks)[stripe_order][col]; chunk.strm_id[gpu::CI_PRESENT] = *local_stream_order; chunk.strm_len[gpu::CI_PRESENT] = stream.length; } @@ -101,11 +101,11 @@ std::size_t gather_stream_info_and_column_desc( } } } else if (col != -1) { - if (chunks.has_value()) { + if (chunks) { if (src_offset >= stripeinfo->indexLength || use_index) { auto const index_type = get_stream_index_type(stream.kind); if (index_type < gpu::CI_NUM_STREAMS) { - auto& chunk = (*chunks.value())[stripe_order][col]; + auto& chunk = (*chunks)[stripe_order][col]; chunk.strm_id[index_type] = *local_stream_order; chunk.strm_len[index_type] = stream.length; // NOTE: skip_count field is temporarily used to track the presence of index streams @@ -121,12 +121,11 @@ std::size_t gather_stream_info_and_column_desc( } (*local_stream_order)++; - } else { // not chunks.has_value() - stream_info.value()->emplace_back( - stripeinfo->offset + src_offset, - dst_offset, - stream.length, - stream_source_info{stripe_order, level, column_id, stream.kind}); + } else { // chunks == nullptr + stream_info->emplace_back(stripeinfo->offset + src_offset, + dst_offset, + stream.length, + stream_source_info{stripe_order, level, column_id, stream.kind}); } dst_offset += stream.length; @@ -381,7 +380,7 @@ void reader_impl::global_preprocess(read_mode mode) nullptr, // num_dictionary_entries nullptr, // local_stream_order &stream_info, - std::nullopt // chunks + nullptr // chunks ); auto const is_stripe_data_empty = stripe_level_size == 0; diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 1f368e1211b..4fcb75b89b6 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -325,7 +325,7 @@ std::size_t gather_stream_info_and_column_desc( bool apply_struct_map, int64_t* num_dictionary_entries, std::size_t* local_stream_order, - std::optional*> const& stream_info, - std::optional*> const& chunks); + std::vector* stream_info, + cudf::detail::hostdevice_2dvector* chunks); } // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index cd22a9e3703..602a73ff78d 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -844,7 +844,7 @@ void reader_impl::decompress_and_decode(read_mode mode) level == 0, &num_dict_entries, &local_stream_order, - std::nullopt, // stream_info + nullptr, // stream_info &chunks); auto const is_stripe_data_empty = total_data_size == 0; From 7bfcdf536a7dddc6b54632de82e8d9f0ff473045 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 3 Apr 2024 14:01:34 -0700 Subject: [PATCH 285/321] Require `output_row_granularity` to be positive all the time Signed-off-by: Nghia Truong --- cpp/include/cudf/io/orc.hpp | 4 +++- cpp/src/io/orc/reader.cu | 5 ----- cpp/src/io/orc/reader_impl.cu | 5 +---- cpp/src/io/orc/reader_impl_chunking.hpp | 2 ++ 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index f1d20cc2094..ac7d086c950 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -464,6 +464,8 @@ class chunked_orc_reader { * @param options Settings for controlling reading behaviors * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation + * + * @throw cudf::logic_error if `output_row_granularity` is non-positive */ explicit chunked_orc_reader( std::size_t output_size_limit, @@ -498,7 +500,7 @@ class chunked_orc_reader { * @brief Construct the reader from output size limits along with other ORC reader options. * * This constructor implicitly call the other constructor with `data_read_limit` set to `0` and - * `output_row_granularity` set to 10'000 rows. + * `output_row_granularity` set to `DEFAULT_OUTPUT_ROW_GRANULARITY` rows. * * @param output_size_limit Limit on total number of bytes to be returned per `read_chunk()` call, * or `0` if there is no limit diff --git a/cpp/src/io/orc/reader.cu b/cpp/src/io/orc/reader.cu index 37eb7ab0fd7..006e70467b5 100644 --- a/cpp/src/io/orc/reader.cu +++ b/cpp/src/io/orc/reader.cu @@ -58,11 +58,6 @@ chunked_reader::chunked_reader(std::size_t output_size_limit, stream, mr)} { - // Although we internally accept non-positive value for `output_row_granularity` because we - // implicitly change such value into `DEFAULT_OUTPUT_ROW_GRANULARITY`. - // The user are not allowed to do so but instead required to specify an explicit positive number. - CUDF_EXPECTS(output_row_granularity > 0, - "The value of `output_row_granularity` must be positive."); } chunked_reader::~chunked_reader() = default; diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 566c8a059d8..f63fd1fbeef 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -177,10 +177,7 @@ reader_impl::reader_impl(std::size_t output_size_limit, _sources(std::move(sources)), _metadata{_sources, stream}, _selected_columns{_metadata.select_columns(options.get_columns())}, - _chunk_read_data{ - output_size_limit, - data_read_limit, - output_row_granularity > 0 ? output_row_granularity : DEFAULT_OUTPUT_ROW_GRANULARITY} + _chunk_read_data{output_size_limit, data_read_limit, output_row_granularity} { // Selected columns at different levels of nesting are stored in different elements // of `selected_columns`; thus, size == 1 means no nested columns. diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 4fcb75b89b6..7faedbcd399 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -185,6 +185,8 @@ struct chunk_read_data { data_read_limit{data_read_limit_}, output_row_granularity{output_row_granularity_} { + CUDF_EXPECTS(output_row_granularity > 0, + "The value of `output_row_granularity` must be positive."); } std::size_t const From a915f33a06e8e6ba146e71f2c82242f570addd9f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 3 Apr 2024 15:52:24 -0700 Subject: [PATCH 286/321] Reorganize code, removing constructors Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 16 +++-- cpp/src/io/orc/reader_impl_chunking.hpp | 84 +++++++++---------------- 2 files changed, 42 insertions(+), 58 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index c1209569285..7a1fb3dfb48 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -122,10 +122,11 @@ std::size_t gather_stream_info_and_column_desc( (*local_stream_order)++; } else { // chunks == nullptr - stream_info->emplace_back(stripeinfo->offset + src_offset, - dst_offset, - stream.length, - stream_source_info{stripe_order, level, column_id, stream.kind}); + stream_info->emplace_back( + orc_stream_info{stripeinfo->offset + src_offset, + dst_offset, + stream.length, + stream_source_info{stripe_order, level, column_id, stream.kind}}); } dst_offset += stream.length; @@ -406,7 +407,12 @@ void reader_impl::global_preprocess(read_mode mode) len += stream_info[stream_level_count].length; stream_level_count++; } - read_info.emplace_back(offset, d_dst, len, stripe.source_idx, stripe_global_idx, level); + read_info.emplace_back(stream_data_read_info{offset, + d_dst, + len, + static_cast(stripe.source_idx), + stripe_global_idx, + level}); } } // end loop level diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 7faedbcd399..8e78514c72d 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -28,6 +28,34 @@ namespace cudf::io::orc::detail { +/** + * @brief Struct representing a range of data. + */ +struct range { + std::size_t begin{0}; + std::size_t end{0}; +}; + +// Store information to identify where to read a chunk of data from source. +// Each read corresponds to one or more consecutive streams combined. +struct stream_data_read_info { + uint64_t offset; // offset in data source + std::size_t dst_pos; // offset to store data in memory relative to start of raw stripe data + std::size_t length; // data length to read + std::size_t source_idx; // the data source id + std::size_t stripe_idx; // global stripe index + std::size_t level; // nested level +}; + +/** + * @brief Compression information for a stripe at a specific nested level. + */ +struct stripe_level_comp_info { + std::size_t num_compressed_blocks{0}; + std::size_t num_uncompressed_blocks{0}; + std::size_t total_decomp_size{0}; +}; + /** * @brief Struct that store source information of an ORC streams. */ @@ -66,13 +94,6 @@ using stream_source_map = * @brief Struct that store information of an ORC stream. */ struct orc_stream_info { - explicit orc_stream_info(uint64_t offset_, - std::size_t dst_pos_, - uint32_t length_, - stream_source_info const& source_) - : offset(offset_), dst_pos(dst_pos_), length(length_), source(source_) - { - } // Data info: uint64_t offset; // offset in data source std::size_t dst_pos; // offset to store data in memory relative to start of raw stripe data @@ -82,23 +103,6 @@ struct orc_stream_info { stream_source_info source; }; -/** - * @brief Compression information for a stripe at a specific nested level. - */ -struct stripe_level_comp_info { - std::size_t num_compressed_blocks{0}; - std::size_t num_uncompressed_blocks{0}; - std::size_t total_decomp_size{0}; -}; - -/** - * @brief Struct representing a range of data. - */ -struct range { - std::size_t begin; - std::size_t end; -}; - /** * @brief Struct storing intermediate processing data loaded from data sources. */ @@ -110,39 +114,13 @@ struct file_intermediate_data { // Return true if no rows or stripes to read. bool has_no_data() const { return rows_to_read == 0 || selected_stripes.empty(); } - // Store information to identify where to read a chunk of data from source. - // Each read corresponds to one or more consecutive streams combined. - struct stream_data_read_info { - stream_data_read_info(uint64_t offset_, - std::size_t dst_pos_, - std::size_t length_, - std::size_t source_idx_, - std::size_t stripe_idx_, - std::size_t level_) - : offset(offset_), - dst_pos(dst_pos_), - length(length_), - source_idx(source_idx_), - stripe_idx(stripe_idx_), - level(level_) - { - } - - uint64_t offset; // offset in data source - std::size_t dst_pos; // offset to store data in memory relative to start of raw stripe data - std::size_t length; // data length to read - std::size_t source_idx; // the data source id - std::size_t stripe_idx; // global stripe index - std::size_t level; // nested level - }; - - // Identify what data to read from source. - std::vector data_read_info; - // For each stripe, we perform a number of read for its streams. // Those reads are identified by a chunk of consecutive read info, stored in data_read_info. std::vector stripe_data_read_ranges; + // Identify what data to read from source. + std::vector data_read_info; + // Store the compression information for each data stream. stream_source_map compinfo_map; From 69d70c587e4b6b904b619ddeb33b548addeebfd3 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 3 Apr 2024 15:53:59 -0700 Subject: [PATCH 287/321] Rename functor Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 4 ++-- cpp/src/io/orc/reader_impl_chunking.hpp | 2 +- cpp/src/io/orc/reader_impl_decode.cu | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 7a1fb3dfb48..c5257f3dc13 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -444,7 +444,7 @@ void reader_impl::global_preprocess(read_mode mode) total_stripe_sizes.d_begin(), total_stripe_sizes.d_end(), total_stripe_sizes.d_begin(), - cumulative_size_sum{}); + cumulative_size_plus{}); total_stripe_sizes.device_to_host_sync(_stream); auto const load_limit = [&] { @@ -695,7 +695,7 @@ void reader_impl::load_data(read_mode mode) stripe_decomp_sizes.d_begin(), stripe_decomp_sizes.d_end(), stripe_decomp_sizes.d_begin(), - cumulative_size_sum{}); + cumulative_size_plus{}); stripe_decomp_sizes.device_to_host_sync(_stream); auto const decode_limit = [&] { diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 8e78514c72d..3b61bc067a9 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -231,7 +231,7 @@ struct cumulative_size_and_row { /** * @brief Functor to sum up cumulative data. */ -struct cumulative_size_sum { +struct cumulative_size_plus { __device__ cumulative_size operator()(cumulative_size const& a, cumulative_size const& b) const { return cumulative_size{a.count + b.count, a.size_bytes + b.size_bytes}; diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 602a73ff78d..455d96691e8 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -701,7 +701,7 @@ std::vector find_table_splits(table_view const& input, segmented_sizes.d_begin(), segmented_sizes.d_end(), segmented_sizes.d_begin(), - cumulative_size_sum{}); + cumulative_size_plus{}); segmented_sizes.device_to_host_sync(stream); return find_splits(segmented_sizes, input.num_rows(), size_limit); From 4e94d531f0f52bf44e62c7f2ce849fe06eeb6cc7 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 3 Apr 2024 15:57:16 -0700 Subject: [PATCH 288/321] Using `host_span` instead of `const&` Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 2 +- cpp/src/io/orc/reader_impl_chunking.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index c5257f3dc13..6c8f2c14adf 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -220,7 +220,7 @@ template std::vector find_splits(host_span find_splits( host_span sizes, std::size_t total_count, std::size_t size_limit); -range get_range(std::vector const& input_ranges, range const& selected_ranges) +range get_range(host_span input_ranges, range const& selected_ranges) { // The first and last range. auto const& first_range = input_ranges[selected_ranges.begin]; diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 3b61bc067a9..58b67760b66 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -268,7 +268,7 @@ std::vector find_splits(host_span cumulative_sizes, * @param selected_ranges A range of ranges from `input_ranges` * @return The range of data span by the selected range of ranges */ -range get_range(std::vector const& input_ranges, range const& selected_ranges); +range get_range(host_span input_ranges, range const& selected_ranges); /** * @brief Function that populates descriptors for either individual streams or chunks of column From 8c056541ae5e0df53234e132d2a5f396278a1a0e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 23 Apr 2024 09:20:09 -0700 Subject: [PATCH 289/321] Use `device_async_resource_ref` --- cpp/include/cudf/io/detail/orc.hpp | 8 ++++---- cpp/include/cudf/io/orc.hpp | 8 ++++---- cpp/src/io/functions.cpp | 6 +++--- cpp/src/io/orc/reader.cu | 6 +++--- cpp/src/io/orc/reader_impl.cu | 4 ++-- cpp/src/io/orc/reader_impl.hpp | 8 ++++---- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index 0000fc8a9c4..4d610891858 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -85,7 +85,7 @@ class chunked_reader { public: /** * @copydoc cudf::io::chunked_orc_reader::chunked_orc_reader(std::size_t, std::size_t, size_type, - * orc_reader_options const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) + * orc_reader_options const&, rmm::cuda_stream_view, rmm::device_async_resource_ref) * * @param sources Input `datasource` objects to read the dataset from */ @@ -95,10 +95,10 @@ class chunked_reader { std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::io::chunked_orc_reader::chunked_orc_reader(std::size_t, std::size_t, - * orc_reader_options const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) + * orc_reader_options const&, rmm::cuda_stream_view, rmm::device_async_resource_ref) * * @param sources Input `datasource` objects to read the dataset from */ @@ -107,7 +107,7 @@ class chunked_reader { std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @brief Destructor explicitly-declared to avoid inlined in header. diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 33f430a9b1c..f4e63a1d84e 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -473,8 +473,8 @@ class chunked_orc_reader { std::size_t data_read_limit, size_type output_row_granularity, orc_reader_options const& options, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct the reader from input/output size limits along with other ORC reader options. @@ -495,7 +495,7 @@ class chunked_orc_reader { std::size_t data_read_limit, orc_reader_options const& options, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct the reader from output size limits along with other ORC reader options. @@ -513,7 +513,7 @@ class chunked_orc_reader { std::size_t output_size_limit, orc_reader_options const& options, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Destructor, destroying the internal reader instance. diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index d23caaec45d..4819e5e7b78 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -445,7 +445,7 @@ chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, size_type output_row_granularity, orc_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) : reader{std::make_unique(output_size_limit, data_read_limit, output_row_granularity, @@ -460,7 +460,7 @@ chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, std::size_t data_read_limit, orc_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) : reader{std::make_unique(output_size_limit, data_read_limit, make_datasources(options.get_source()), @@ -473,7 +473,7 @@ chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, orc_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) : chunked_orc_reader(output_size_limit, 0UL, options, stream, mr) { } diff --git a/cpp/src/io/orc/reader.cu b/cpp/src/io/orc/reader.cu index 006e70467b5..d4b0c3383af 100644 --- a/cpp/src/io/orc/reader.cu +++ b/cpp/src/io/orc/reader.cu @@ -25,7 +25,7 @@ reader::~reader() = default; reader::reader(std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) : _impl{std::make_unique(std::move(sources), options, stream, mr)} { } @@ -37,7 +37,7 @@ chunked_reader::chunked_reader(std::size_t output_size_limit, std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) : _impl{std::make_unique( output_size_limit, data_read_limit, std::move(sources), options, stream, mr)} { @@ -49,7 +49,7 @@ chunked_reader::chunked_reader(std::size_t output_size_limit, std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) : _impl{std::make_unique(output_size_limit, data_read_limit, output_row_granularity, diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 4a3ac4833f6..4d874cfc1b9 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -144,7 +144,7 @@ reader_impl::reader_impl(std::size_t output_size_limit, std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) : reader_impl::reader_impl(output_size_limit, data_read_limit, DEFAULT_OUTPUT_ROW_GRANULARITY, @@ -161,7 +161,7 @@ reader_impl::reader_impl(std::size_t output_size_limit, std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) : _stream(stream), _mr(mr), _config{options.get_timestamp_type(), diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index a872e5a091c..310102079fb 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -59,18 +59,18 @@ class reader_impl { /** * @copydoc cudf::io::orc::detail::chunked_reader::chunked_reader(std::size_t, std::size_t, - * orc_reader_options const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) + * orc_reader_options const&, rmm::cuda_stream_view, rmm::device_async_resource_ref) */ explicit reader_impl(std::size_t output_size_limit, std::size_t data_read_limit, std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::io::orc::detail::chunked_reader::chunked_reader(std::size_t, std::size_t, - * size_type, orc_reader_options const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) + * size_type, orc_reader_options const&, rmm::cuda_stream_view, rmm::device_async_resource_ref) */ explicit reader_impl(std::size_t output_size_limit, std::size_t data_read_limit, @@ -78,7 +78,7 @@ class reader_impl { std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); /** * @copydoc cudf::io::orc::detail::reader::read From 3b4d7f22903902bad7bbd43447d3e00b5c72be63 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 23 Apr 2024 10:26:10 -0700 Subject: [PATCH 290/321] Rename `global_preprocess` into `preprocess_file` --- cpp/src/io/orc/reader_impl.cu | 2 +- cpp/src/io/orc/reader_impl.hpp | 6 +++--- cpp/src/io/orc/reader_impl_chunking.cu | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 4d874cfc1b9..289573b38ca 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -30,7 +30,7 @@ void reader_impl::prepare_data(read_mode mode) if (_selected_columns.num_levels() == 0) { return; } // This will be no-op if it was called before. - global_preprocess(mode); + preprocess_file(mode); if (!_chunk_read_data.more_table_chunk_to_output()) { if (!_chunk_read_data.more_stripe_to_decode() && _chunk_read_data.more_stripe_to_load()) { diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 310102079fb..fd78a35792c 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -112,8 +112,8 @@ class reader_impl { void prepare_data(read_mode mode); /** - * @brief Perform a global preprocessing step that executes exactly once for the entire duration - * of the reader. + * @brief Perform a preprocessing step on the input data sources that executes exactly once + * for the entire duration of the reader. * * In this step, the metadata of all stripes in the data sources is parsed, and information about * data streams of the selected columns in all stripes are generated. If the reader has a data @@ -123,7 +123,7 @@ class reader_impl { * * @param mode Value indicating if the data sources are read all at once or chunk by chunk */ - void global_preprocess(read_mode mode); + void preprocess_file(read_mode mode); /** * @brief Load stripes from the input data sources into memory. diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 6c8f2c14adf..c05082a377a 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -230,7 +230,7 @@ range get_range(host_span input_ranges, range const& selected_range return {first_range.begin, last_range.end}; } -void reader_impl::global_preprocess(read_mode mode) +void reader_impl::preprocess_file(read_mode mode) { if (_file_itm_data.global_preprocessed) { return; } _file_itm_data.global_preprocessed = true; From 33f6d158fcb23ff26b13dd953f01691474ad14db Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 23 Apr 2024 12:32:48 -0700 Subject: [PATCH 291/321] Optimize memory usage --- cpp/src/io/orc/reader_impl_chunking.cu | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index c05082a377a..6ad67701f0b 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -351,11 +351,15 @@ void reader_impl::preprocess_file(read_mode mode) // Collect all data streams' information: // + // Load all stripes if we are in READ_ALL mode or there is no read limit. + auto const load_all_stripes = + mode == read_mode::READ_ALL || _chunk_read_data.data_read_limit == 0; + // Accumulate data size for data streams in each stripe. // This will be used for CHUNKED_READ mode only. // If we are in READ_ALL mode, we do not need this since we just load all stripes. cudf::detail::hostdevice_vector total_stripe_sizes( - mode == read_mode::CHUNKED_READ ? num_total_stripes : std::size_t{0}, _stream); + load_all_stripes ? std::size_t{0} : num_total_stripes, _stream); for (std::size_t stripe_global_idx = 0; stripe_global_idx < num_total_stripes; ++stripe_global_idx) { @@ -416,9 +420,7 @@ void reader_impl::preprocess_file(read_mode mode) } } // end loop level - if (mode == read_mode::CHUNKED_READ) { - total_stripe_sizes[stripe_global_idx] = {1, this_stripe_size}; - } + if (!load_all_stripes) { total_stripe_sizes[stripe_global_idx] = {1, this_stripe_size}; } // Range of all stream reads in `read_info` corresponding to this stripe, in all levels. stripe_data_read_ranges[stripe_global_idx] = range{last_read_size, read_info.size()}; @@ -432,8 +434,7 @@ void reader_impl::preprocess_file(read_mode mode) // Load range is reset to start from the first position in `load_stripe_ranges`. _chunk_read_data.curr_load_stripe_range = 0; - // Load all stripes if there is no read limit or if we are in READ_ALL mode. - if (mode == read_mode::READ_ALL || _chunk_read_data.data_read_limit == 0) { + if (load_all_stripes) { _chunk_read_data.load_stripe_ranges = {range{0UL, num_total_stripes}}; return; } From 5a253bd04bea1a1ea508c89acce50559521d3b23 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 23 Apr 2024 13:29:05 -0700 Subject: [PATCH 292/321] Fix overflow handling Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 69 ++++++++++++++------------ 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 6ad67701f0b..776e000f12f 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -544,6 +544,9 @@ void reader_impl::load_data(read_mode mode) // Decoding range is reset to start from the first position in `decode_stripe_ranges`. _chunk_read_data.curr_decode_stripe_range = 0; + auto constexpr column_size_limit = + static_cast(std::numeric_limits::max()); + // Decode all loaded stripes if there is no read limit, or if we are in READ_ALL mode. // In theory, we should just decode 'enough' stripes for output one table chunk, instead of // decoding all stripes like this, for better load-balancing and reduce memory usage. @@ -552,34 +555,11 @@ void reader_impl::load_data(read_mode mode) // In addition to read limit, we also need to check if the the total number of // rows in the loaded stripes exceeds column size limit. // If that is the case, we cannot decode all stripes at once. - num_loading_rows < static_cast(std::numeric_limits::max())) { + num_loading_rows < column_size_limit) { _chunk_read_data.decode_stripe_ranges = {load_stripe_range}; return; } - // For estimating the decompressed sizes of the loaded stripes. - // Only used in CHUNKED_READ mode. - cudf::detail::hostdevice_vector stripe_decomp_sizes( - mode == read_mode::CHUNKED_READ ? stripe_count : std::size_t{0}, _stream); - - // For mapping stripe to the number of rows in it. - // Only used in READ_ALL mode. - // This is to store exactly the same data as for `stripe_decomp_size` above but here we do not - // need to allocate device memory. - std::vector stripe_rows(mode == read_mode::READ_ALL ? stripe_count - : std::size_t{0}); - - // Fill up the `cumulative_size_and_row` array. - // Note: `hostdevice_vector::begin()` mirrors `std::vector::data()` using incorrect name. - auto const stripe_sizes_rows_ptr = - mode == read_mode::CHUNKED_READ ? stripe_decomp_sizes.begin() : stripe_rows.data(); - for (std::size_t idx = 0; idx < stripe_count; ++idx) { - auto const& stripe = _file_itm_data.selected_stripes[idx + stripe_start]; - auto const stripe_info = stripe.stripe_info; - stripe_sizes_rows_ptr[idx] = - cumulative_size_and_row{1UL /*count*/, 0UL /*size_bytes*/, stripe_info->numberOfRows}; - } - // This is the post-processing step after we've done with splitting `load_stripe_range` into // `decode_stripe_ranges`. auto const add_range_offset = [stripe_start](std::vector& new_ranges) { @@ -593,19 +573,29 @@ void reader_impl::load_data(read_mode mode) }; // Optimized code path when we do not have any read limit but the number of rows in the - // loaded stripes exceeds column size limit. + // loaded stripes exceeds cudf's column size limit. // Note that the values `max_uncompressed_size` for each stripe are not computed here. // Instead, they will be computed on the fly during decoding to avoid the overhead of // storing and retrieving from memory. if ((mode == read_mode::READ_ALL || _chunk_read_data.data_read_limit == 0) && - num_loading_rows >= static_cast(std::numeric_limits::max())) { - // Here we will split stripe ranges based on stripes' number of rows, not their data size. - // Thus, we use a maximum possible value for data size limit. - // The function `find_splits` will automatically handle row count limit. - _chunk_read_data.decode_stripe_ranges = find_splits( - cudf::host_span(stripe_sizes_rows_ptr, stripe_count), - stripe_count, - std::numeric_limits::max()); + num_loading_rows >= column_size_limit) { + std::vector cumulative_stripe_rows(stripe_count); + std::size_t rows{0}; + + for (std::size_t idx = 0; idx < stripe_count; ++idx) { + auto const& stripe = _file_itm_data.selected_stripes[idx + stripe_start]; + auto const stripe_info = stripe.stripe_info; + rows += stripe_info->numberOfRows; + + // Here we will split stripe ranges based only on stripes' number of rows, not data size. + // Thus, we override the cumulative `size_bytes` using the prefix sum of rows in stripe and + // will use the column size limit (`std::numeric_limits::max()`) as split limit. + cumulative_stripe_rows[idx] = + cumulative_size_and_row{idx + 1UL /*count*/, rows /*size_bytes*/, rows}; + } + + _chunk_read_data.decode_stripe_ranges = + find_splits(cumulative_stripe_rows, stripe_count, column_size_limit); add_range_offset(_chunk_read_data.decode_stripe_ranges); return; } @@ -615,6 +605,19 @@ void reader_impl::load_data(read_mode mode) // memory: // + // For estimating the decompressed sizes of the loaded stripes. + cudf::detail::hostdevice_vector stripe_decomp_sizes(stripe_count, + _stream); + + // Fill up the `cumulative_size_and_row` array with initial values. + // Note: `hostdevice_vector::begin()` mirrors `std::vector::data()` using incorrect name. + for (std::size_t idx = 0; idx < stripe_count; ++idx) { + auto const& stripe = _file_itm_data.selected_stripes[idx + stripe_start]; + auto const stripe_info = stripe.stripe_info; + stripe_decomp_sizes[idx] = + cumulative_size_and_row{1UL /*count*/, 0UL /*size_bytes*/, stripe_info->numberOfRows}; + } + auto& compinfo_map = _file_itm_data.compinfo_map; compinfo_map.clear(); // clear cache of the last load From 7a9c43671c29d6683287c90028afff6af42a1cdd Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 23 Apr 2024 13:53:02 -0700 Subject: [PATCH 293/321] Remove `reader.cu` Signed-off-by: Nghia Truong --- cpp/CMakeLists.txt | 1 - cpp/src/io/orc/reader.cu | 69 ----------------------------------- cpp/src/io/orc/reader_impl.cu | 46 +++++++++++++++++++++++ 3 files changed, 46 insertions(+), 70 deletions(-) delete mode 100644 cpp/src/io/orc/reader.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 1a5827ef144..648cbf0a428 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -393,7 +393,6 @@ add_library( src/io/orc/aggregate_orc_metadata.cpp src/io/orc/dict_enc.cu src/io/orc/orc.cpp - src/io/orc/reader.cu src/io/orc/reader_impl.cu src/io/orc/reader_impl_chunking.cu src/io/orc/reader_impl_decode.cu diff --git a/cpp/src/io/orc/reader.cu b/cpp/src/io/orc/reader.cu deleted file mode 100644 index d4b0c3383af..00000000000 --- a/cpp/src/io/orc/reader.cu +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "io/orc/reader_impl.hpp" -#include "io/orc/reader_impl_helpers.hpp" - -namespace cudf::io::orc::detail { - -// Destructor are defined within this translation unit. -reader::~reader() = default; - -reader::reader(std::vector>&& sources, - orc_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) - : _impl{std::make_unique(std::move(sources), options, stream, mr)} -{ -} - -table_with_metadata reader::read() { return _impl->read(); } - -chunked_reader::chunked_reader(std::size_t output_size_limit, - std::size_t data_read_limit, - std::vector>&& sources, - orc_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) - : _impl{std::make_unique( - output_size_limit, data_read_limit, std::move(sources), options, stream, mr)} -{ -} - -chunked_reader::chunked_reader(std::size_t output_size_limit, - std::size_t data_read_limit, - size_type output_row_granularity, - std::vector>&& sources, - orc_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) - : _impl{std::make_unique(output_size_limit, - data_read_limit, - output_row_granularity, - std::move(sources), - options, - stream, - mr)} -{ -} - -chunked_reader::~chunked_reader() = default; - -bool chunked_reader::has_next() const { return _impl->has_next(); } - -table_with_metadata chunked_reader::read_chunk() const { return _impl->read_chunk(); } - -} // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 289573b38ca..e9c34896425 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -201,4 +201,50 @@ table_with_metadata reader_impl::read_chunk() return make_output_chunk(); } +chunked_reader::chunked_reader(std::size_t output_size_limit, + std::size_t data_read_limit, + std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) + : _impl{std::make_unique( + output_size_limit, data_read_limit, std::move(sources), options, stream, mr)} +{ +} + +chunked_reader::chunked_reader(std::size_t output_size_limit, + std::size_t data_read_limit, + size_type output_row_granularity, + std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) + : _impl{std::make_unique(output_size_limit, + data_read_limit, + output_row_granularity, + std::move(sources), + options, + stream, + mr)} +{ +} + +chunked_reader::~chunked_reader() = default; + +bool chunked_reader::has_next() const { return _impl->has_next(); } + +table_with_metadata chunked_reader::read_chunk() const { return _impl->read_chunk(); } + +reader::reader(std::vector>&& sources, + orc_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) + : _impl{std::make_unique(std::move(sources), options, stream, mr)} +{ +} + +reader::~reader() = default; + +table_with_metadata reader::read() { return _impl->read(); } + } // namespace cudf::io::orc::detail From 219372213a9f7d96cfc8094bebf240aa4e6edbe0 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 23 Apr 2024 14:06:17 -0700 Subject: [PATCH 294/321] Add a test Signed-off-by: Nghia Truong --- cpp/tests/io/orc_chunked_reader_test.cu | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 59727c6d5fc..1c1b53ea17f 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -174,6 +174,18 @@ TEST_F(OrcChunkedReaderTest, TestChunkedReadNoData) CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } +TEST_F(OrcChunkedReaderTest, TestChunkedReadInvalidParameter) +{ + std::vector> input_columns; + input_columns.emplace_back(int32s_col{}.release()); + input_columns.emplace_back(int64s_col{}.release()); + + auto const [expected, filepath] = write_file(input_columns, "chunked_read_invalid"); + EXPECT_THROW( + chunked_read(filepath, output_limit{1'000}, output_row_granularity{-1} /*invalid value*/), + cudf::logic_error); +} + TEST_F(OrcChunkedReaderTest, TestChunkedReadSimpleData) { auto constexpr num_rows = 40'000; From 4d3ddd18c4582a921ca632abdc5b4b81809eb160 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 23 Apr 2024 14:40:56 -0700 Subject: [PATCH 295/321] Rewrite benchmark Signed-off-by: Nghia Truong --- cpp/benchmarks/io/orc/orc_reader_input.cpp | 45 ++++++++++++---------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index 58b43367382..1f73374ac20 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -30,6 +30,7 @@ namespace { // run on most GPUs, but large enough to allow highest throughput constexpr int64_t data_size = 512 << 20; constexpr cudf::size_type num_cols = 64; +constexpr std::size_t Mbytes{1024 * 1024}; template void orc_read_common(cudf::size_type num_rows_to_read, @@ -46,10 +47,12 @@ void orc_read_common(cudf::size_type num_rows_to_read, state.exec( nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch&, auto& timer) { try_drop_l3_cache(); - auto const output_limit = static_cast(state.get_int64("output_limit")); - auto const read_limit = static_cast(state.get_int64("read_limit")); + auto const output_limit_MB = + static_cast(state.get_int64("chunk_read_limit_MB")); + auto const read_limit_MB = static_cast(state.get_int64("pass_read_limit_MB")); - auto reader = cudf::io::chunked_orc_reader(output_limit, read_limit, read_opts); + auto reader = + cudf::io::chunked_orc_reader(output_limit_MB * Mbytes, read_limit_MB * Mbytes, read_opts); cudf::size_type num_rows{0}; timer.start(); @@ -120,15 +123,21 @@ void orc_read_io_compression(nvbench::state& state) static_cast(data_type::LIST), static_cast(data_type::STRUCT)}); - cudf::size_type const cardinality = state.get_int64("cardinality"); - cudf::size_type const run_length = state.get_int64("run_length"); + auto const [cardinality, run_length] = [&]() -> std::pair { + if constexpr (chunked_read) { + return {0, 4}; + } else { + return {static_cast(state.get_int64("cardinality")), + static_cast(state.get_int64("run_length"))}; + } + }(); cuio_source_sink_pair source_sink(IOType); auto const num_rows_written = [&]() { auto const tbl = create_random_table( cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, - data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); + data_profile_builder{}.cardinality(cardinality).avg_run_length(run_length)); auto const view = tbl->view(); cudf::io::orc_writer_options opts = @@ -149,12 +158,12 @@ void BM_orc_read_io_compression( return orc_read_io_compression(state); } -template -void BM_orc_chunked_read_io_compression( - nvbench::state& state, - nvbench::type_list, nvbench::enum_type>) +template +void BM_orc_chunked_read_io_compression(nvbench::state& state, + nvbench::type_list>) { - return orc_read_io_compression(state); + // Only run benchmark using HOST_BUFFER IO. + return orc_read_io_compression(state); } using d_type_list = nvbench::enum_type_list Date: Tue, 23 Apr 2024 14:52:37 -0700 Subject: [PATCH 296/321] Rename parameters Signed-off-by: Nghia Truong --- cpp/include/cudf/io/detail/orc.hpp | 8 +++--- cpp/include/cudf/io/orc.hpp | 36 ++++++++++++------------- cpp/src/io/functions.cpp | 20 +++++++------- cpp/src/io/orc/reader_impl.cu | 28 +++++++++---------- cpp/src/io/orc/reader_impl.hpp | 10 +++---- cpp/src/io/orc/reader_impl_chunking.cu | 10 +++---- cpp/src/io/orc/reader_impl_chunking.hpp | 12 ++++----- cpp/src/io/orc/reader_impl_decode.cu | 4 +-- 8 files changed, 64 insertions(+), 64 deletions(-) diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index 4d610891858..597ddd9cf0a 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -89,8 +89,8 @@ class chunked_reader { * * @param sources Input `datasource` objects to read the dataset from */ - explicit chunked_reader(std::size_t output_size_limit, - std::size_t data_read_limit, + explicit chunked_reader(std::size_t chunk_read_limit, + std::size_t pass_read_limit, size_type output_row_granularity, std::vector>&& sources, orc_reader_options const& options, @@ -102,8 +102,8 @@ class chunked_reader { * * @param sources Input `datasource` objects to read the dataset from */ - explicit chunked_reader(std::size_t output_size_limit, - std::size_t data_read_limit, + explicit chunked_reader(std::size_t chunk_read_limit, + std::size_t pass_read_limit, std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index f4e63a1d84e..8140f8897b7 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -407,7 +407,7 @@ table_with_metadata read_orc( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** - * @brief The chunked orc reader class to read ORC file iteratively into a series of + * @brief The chunked orc reader class to read an ORC file iteratively into a series of * tables, chunk by chunk. * * This class is designed to address the reading issue when reading very large ORC files such @@ -437,28 +437,28 @@ class chunked_orc_reader { * * ``` * - * If `output_size_limit == 0` (i.e., no output limit) and `data_read_limit == 0` (no temporary + * If `chunk_read_limit == 0` (i.e., no output limit) and `pass_read_limit == 0` (no temporary * memory size limit), a call to `read_chunk()` will read the whole data source and return a table * containing all rows. * - * The `output_size_limit` parameter controls the size of the output table to be returned per + * The `chunk_read_limit` parameter controls the size of the output table to be returned per * `read_chunk()` call. If the user specifies a 100 MB limit, the reader will attempt to return * tables that have a total bytes size (over all columns) of 100 MB or less. * This is a soft limit and the code will not fail if it cannot satisfy the limit. * - * The `data_read_limit` parameter controls how much temporary memory is used in the entire + * The `pass_read_limit` parameter controls how much temporary memory is used in the entire * process of loading, decompressing and decoding of data. Again, this is also a soft limit and * the reader will try to make the best effort. * * Finally, the parameter `output_row_granularity` controls the changes in row number of the - * output chunk. For each call to `read_chunk()`, with respect to the given `data_read_limit`, a + * output chunk. For each call to `read_chunk()`, with respect to the given `pass_read_limit`, a * subset of stripes may be loaded, decompressed and decoded into an intermediate table. The * reader will then subdivide that table into smaller tables for final output using * `output_row_granularity` as the subdivision step. * - * @param output_size_limit Limit on total number of bytes to be returned per `read_chunk()` call, + * @param chunk_read_limit Limit on total number of bytes to be returned per `read_chunk()` call, * or `0` if there is no limit - * @param data_read_limit Limit on temporary memory usage for reading the data sources, + * @param pass_read_limit Limit on temporary memory usage for reading the data sources, * or `0` if there is no limit * @param output_row_granularity The granularity parameter used for subdividing the decoded * table for final output @@ -469,8 +469,8 @@ class chunked_orc_reader { * @throw cudf::logic_error if `output_row_granularity` is non-positive */ explicit chunked_orc_reader( - std::size_t output_size_limit, - std::size_t data_read_limit, + std::size_t chunk_read_limit, + std::size_t pass_read_limit, size_type output_row_granularity, orc_reader_options const& options, rmm::cuda_stream_view stream = cudf::get_default_stream(), @@ -482,37 +482,37 @@ class chunked_orc_reader { * This constructor implicitly call the other constructor with `output_row_granularity` set to * `DEFAULT_OUTPUT_ROW_GRANULARITY` rows. * - * @param output_size_limit Limit on total number of bytes to be returned per `read_chunk()` call, + * @param chunk_read_limit Limit on total number of bytes to be returned per `read_chunk()` call, * or `0` if there is no limit - * @param data_read_limit Limit on temporary memory usage for reading the data sources, + * @param pass_read_limit Limit on temporary memory usage for reading the data sources, * or `0` if there is no limit * @param options Settings for controlling reading behaviors * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ explicit chunked_orc_reader( - std::size_t output_size_limit, - std::size_t data_read_limit, + std::size_t chunk_read_limit, + std::size_t pass_read_limit, orc_reader_options const& options, - rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct the reader from output size limits along with other ORC reader options. * - * This constructor implicitly call the other constructor with `data_read_limit` set to `0` and + * This constructor implicitly call the other constructor with `pass_read_limit` set to `0` and * `output_row_granularity` set to `DEFAULT_OUTPUT_ROW_GRANULARITY` rows. * - * @param output_size_limit Limit on total number of bytes to be returned per `read_chunk()` call, + * @param chunk_read_limit Limit on total number of bytes to be returned per `read_chunk()` call, * or `0` if there is no limit * @param options Settings for controlling reading behaviors * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ explicit chunked_orc_reader( - std::size_t output_size_limit, + std::size_t chunk_read_limit, orc_reader_options const& options, - rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index 4819e5e7b78..74b5a654382 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -440,14 +440,14 @@ void write_orc(orc_writer_options const& options, rmm::cuda_stream_view stream) writer->write(options.get_table()); } -chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, - std::size_t data_read_limit, +chunked_orc_reader::chunked_orc_reader(std::size_t chunk_read_limit, + std::size_t pass_read_limit, size_type output_row_granularity, orc_reader_options const& options, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) - : reader{std::make_unique(output_size_limit, - data_read_limit, + : reader{std::make_unique(chunk_read_limit, + pass_read_limit, output_row_granularity, make_datasources(options.get_source()), options, @@ -456,13 +456,13 @@ chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, { } -chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, - std::size_t data_read_limit, +chunked_orc_reader::chunked_orc_reader(std::size_t chunk_read_limit, + std::size_t pass_read_limit, orc_reader_options const& options, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) - : reader{std::make_unique(output_size_limit, - data_read_limit, + : reader{std::make_unique(chunk_read_limit, + pass_read_limit, make_datasources(options.get_source()), options, stream, @@ -470,11 +470,11 @@ chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, { } -chunked_orc_reader::chunked_orc_reader(std::size_t output_size_limit, +chunked_orc_reader::chunked_orc_reader(std::size_t chunk_read_limit, orc_reader_options const& options, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) - : chunked_orc_reader(output_size_limit, 0UL, options, stream, mr) + : chunked_orc_reader(chunk_read_limit, 0UL, options, stream, mr) { } diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index e9c34896425..63cc0226ea3 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -139,14 +139,14 @@ reader_impl::reader_impl(std::vector>&& sources, { } -reader_impl::reader_impl(std::size_t output_size_limit, - std::size_t data_read_limit, +reader_impl::reader_impl(std::size_t chunk_read_limit, + std::size_t pass_read_limit, std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) - : reader_impl::reader_impl(output_size_limit, - data_read_limit, + : reader_impl::reader_impl(chunk_read_limit, + pass_read_limit, DEFAULT_OUTPUT_ROW_GRANULARITY, std::move(sources), options, @@ -155,8 +155,8 @@ reader_impl::reader_impl(std::size_t output_size_limit, { } -reader_impl::reader_impl(std::size_t output_size_limit, - std::size_t data_read_limit, +reader_impl::reader_impl(std::size_t chunk_read_limit, + std::size_t pass_read_limit, size_type output_row_granularity, std::vector>&& sources, orc_reader_options const& options, @@ -175,7 +175,7 @@ reader_impl::reader_impl(std::size_t output_size_limit, _sources(std::move(sources)), _metadata{_sources, stream}, _selected_columns{_metadata.select_columns(options.get_columns())}, - _chunk_read_data{output_size_limit, data_read_limit, output_row_granularity} + _chunk_read_data{chunk_read_limit, pass_read_limit, output_row_granularity} { // Selected columns at different levels of nesting are stored in different elements // of `selected_columns`; thus, size == 1 means no nested columns. @@ -201,26 +201,26 @@ table_with_metadata reader_impl::read_chunk() return make_output_chunk(); } -chunked_reader::chunked_reader(std::size_t output_size_limit, - std::size_t data_read_limit, +chunked_reader::chunked_reader(std::size_t chunk_read_limit, + std::size_t pass_read_limit, std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) : _impl{std::make_unique( - output_size_limit, data_read_limit, std::move(sources), options, stream, mr)} + chunk_read_limit, pass_read_limit, std::move(sources), options, stream, mr)} { } -chunked_reader::chunked_reader(std::size_t output_size_limit, - std::size_t data_read_limit, +chunked_reader::chunked_reader(std::size_t chunk_read_limit, + std::size_t pass_read_limit, size_type output_row_granularity, std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) - : _impl{std::make_unique(output_size_limit, - data_read_limit, + : _impl{std::make_unique(chunk_read_limit, + pass_read_limit, output_row_granularity, std::move(sources), options, diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index fd78a35792c..b9ec4a74a31 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -44,7 +44,7 @@ class reader_impl { /** * @brief Constructor from a dataset source with reader options. * - * This constructor will call the other constructor with `output_size_limit` and `data_read_limit` + * This constructor will call the other constructor with `chunk_read_limit` and `pass_read_limit` * set to `0` and `output_row_granularity` set to `DEFAULT_OUTPUT_ROW_GRANULARITY`. * * @param sources Dataset sources @@ -61,8 +61,8 @@ class reader_impl { * @copydoc cudf::io::orc::detail::chunked_reader::chunked_reader(std::size_t, std::size_t, * orc_reader_options const&, rmm::cuda_stream_view, rmm::device_async_resource_ref) */ - explicit reader_impl(std::size_t output_size_limit, - std::size_t data_read_limit, + explicit reader_impl(std::size_t chunk_read_limit, + std::size_t pass_read_limit, std::vector>&& sources, orc_reader_options const& options, rmm::cuda_stream_view stream, @@ -72,8 +72,8 @@ class reader_impl { * @copydoc cudf::io::orc::detail::chunked_reader::chunked_reader(std::size_t, std::size_t, * size_type, orc_reader_options const&, rmm::cuda_stream_view, rmm::device_async_resource_ref) */ - explicit reader_impl(std::size_t output_size_limit, - std::size_t data_read_limit, + explicit reader_impl(std::size_t chunk_read_limit, + std::size_t pass_read_limit, size_type output_row_granularity, std::vector>&& sources, orc_reader_options const& options, diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 776e000f12f..7e96b251868 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -353,7 +353,7 @@ void reader_impl::preprocess_file(read_mode mode) // Load all stripes if we are in READ_ALL mode or there is no read limit. auto const load_all_stripes = - mode == read_mode::READ_ALL || _chunk_read_data.data_read_limit == 0; + mode == read_mode::READ_ALL || _chunk_read_data.pass_read_limit == 0; // Accumulate data size for data streams in each stripe. // This will be used for CHUNKED_READ mode only. @@ -449,7 +449,7 @@ void reader_impl::preprocess_file(read_mode mode) total_stripe_sizes.device_to_host_sync(_stream); auto const load_limit = [&] { - auto const tmp = static_cast(_chunk_read_data.data_read_limit * + auto const tmp = static_cast(_chunk_read_data.pass_read_limit * chunk_read_data::load_limit_ratio); // Make sure not to pass 0 byte limit (due to round-off) to `find_splits`. return tmp > 0UL ? tmp : 1UL; @@ -551,7 +551,7 @@ void reader_impl::load_data(read_mode mode) // In theory, we should just decode 'enough' stripes for output one table chunk, instead of // decoding all stripes like this, for better load-balancing and reduce memory usage. // However, we do not have any good way to know how many stripes are 'enough'. - if ((mode == read_mode::READ_ALL || _chunk_read_data.data_read_limit == 0) && + if ((mode == read_mode::READ_ALL || _chunk_read_data.pass_read_limit == 0) && // In addition to read limit, we also need to check if the the total number of // rows in the loaded stripes exceeds column size limit. // If that is the case, we cannot decode all stripes at once. @@ -577,7 +577,7 @@ void reader_impl::load_data(read_mode mode) // Note that the values `max_uncompressed_size` for each stripe are not computed here. // Instead, they will be computed on the fly during decoding to avoid the overhead of // storing and retrieving from memory. - if ((mode == read_mode::READ_ALL || _chunk_read_data.data_read_limit == 0) && + if ((mode == read_mode::READ_ALL || _chunk_read_data.pass_read_limit == 0) && num_loading_rows >= column_size_limit) { std::vector cumulative_stripe_rows(stripe_count); std::size_t rows{0}; @@ -703,7 +703,7 @@ void reader_impl::load_data(read_mode mode) stripe_decomp_sizes.device_to_host_sync(_stream); auto const decode_limit = [&] { - auto const tmp = static_cast(_chunk_read_data.data_read_limit * + auto const tmp = static_cast(_chunk_read_data.pass_read_limit * chunk_read_data::decode_limit_ratio); // Make sure not to pass 0 byte limit to `find_splits`. return tmp > 0UL ? tmp : 1UL; diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 58b67760b66..7e58188a0a7 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -159,8 +159,8 @@ struct chunk_read_data { explicit chunk_read_data(std::size_t output_size_limit_, std::size_t data_read_limit_, size_type output_row_granularity_) - : output_size_limit{output_size_limit_}, - data_read_limit{data_read_limit_}, + : chunk_read_limit{output_size_limit_}, + pass_read_limit{data_read_limit_}, output_row_granularity{output_row_granularity_} { CUDF_EXPECTS(output_row_granularity > 0, @@ -168,14 +168,14 @@ struct chunk_read_data { } std::size_t const - output_size_limit; // maximum size (in bytes) of an output chunk, or 0 for no limit - std::size_t const data_read_limit; // approximate maximum size (in bytes) used for store + chunk_read_limit; // maximum size (in bytes) of an output chunk, or 0 for no limit + std::size_t const pass_read_limit; // approximate maximum size (in bytes) used for store // intermediate data, or 0 for no limit size_type const output_row_granularity; // Memory limits for loading data and decoding are computed as - // `load/decode_limit_ratio * data_read_limit`. - // This is to maintain the total memory usage to be **around** the given `data_read_limit`. + // `load/decode_limit_ratio * pass_read_limit`. + // This is to maintain the total memory usage to be **around** the given `pass_read_limit`. // Note that sum of these limits may not be `1.0`, and their values are set empirically. static double constexpr load_limit_ratio{0.25}; static double constexpr decode_limit_ratio{0.6}; diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 91dbff9689e..2ac3f0dfb3b 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -1066,12 +1066,12 @@ void reader_impl::decompress_and_decode(read_mode mode) // Split the decoded table into ranges that be output into chunks having size within the given // output size limit. _chunk_read_data.output_table_ranges = - _chunk_read_data.output_size_limit == 0 + _chunk_read_data.chunk_read_limit == 0 ? std::vector{range{ 0, static_cast(_chunk_read_data.decoded_table->num_rows())}} : find_table_splits(_chunk_read_data.decoded_table->view(), _chunk_read_data.output_row_granularity, - _chunk_read_data.output_size_limit, + _chunk_read_data.chunk_read_limit, _stream); } From b5343dc1a82abfe587e2d1ed6f67d0ee8dd07f9f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 23 Apr 2024 14:55:35 -0700 Subject: [PATCH 297/321] Rename parameter Signed-off-by: Nghia Truong --- cpp/src/io/orc/aggregate_orc_metadata.cpp | 7 ++++--- cpp/src/io/orc/aggregate_orc_metadata.hpp | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/src/io/orc/aggregate_orc_metadata.cpp b/cpp/src/io/orc/aggregate_orc_metadata.cpp index da49fc84d06..ac0dd10856c 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.cpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.cpp @@ -156,15 +156,16 @@ std::tuple> aggregate_orc_metadata::select_stripes( std::vector> const& user_specified_stripes, int64_t skip_rows, - std::optional const& num_rows, + std::optional const& num_read_rows, rmm::cuda_stream_view stream) { - CUDF_EXPECTS((skip_rows == 0 and not num_rows.has_value()) or user_specified_stripes.empty(), + CUDF_EXPECTS((skip_rows == 0 and not num_read_rows.has_value()) or user_specified_stripes.empty(), "Can't use both the row selection and the stripe selection"); auto [rows_to_skip, rows_to_read] = [&]() { if (not user_specified_stripes.empty()) { return std::pair{0, 0}; } - return cudf::io::detail::skip_rows_num_rows_from_options(skip_rows, num_rows, get_num_rows()); + return cudf::io::detail::skip_rows_num_rows_from_options( + skip_rows, num_read_rows, get_num_rows()); }(); struct stripe_source_mapping { diff --git a/cpp/src/io/orc/aggregate_orc_metadata.hpp b/cpp/src/io/orc/aggregate_orc_metadata.hpp index 94f681fff0c..5da5af58b9b 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.hpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.hpp @@ -118,7 +118,7 @@ class aggregate_orc_metadata { * * @param user_specified_stripes The specified stripe indices to read * @param skip_rows Number of rows to skip from reading - * @param num_rows Number of rows to read + * @param num_read_rows Number of rows to read * @param stream CUDA stream used for device memory operations and kernel launches * @return A tuple of the corrected skip_rows and num_rows values along with a vector of * stripes' metadata such as footer, data information, and source index @@ -126,7 +126,7 @@ class aggregate_orc_metadata { [[nodiscard]] std::tuple> select_stripes( std::vector> const& user_specified_stripes, int64_t skip_rows, - std::optional const& num_rows, + std::optional const& num_read_rows, rmm::cuda_stream_view stream); /** From bdc92a0f5bed1e1683e5e4dfea5b22a9ea832747 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 23 Apr 2024 15:47:06 -0700 Subject: [PATCH 298/321] Rename functions Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 14 +++++++------- cpp/src/io/orc/reader_impl.hpp | 14 +++++++------- cpp/src/io/orc/reader_impl_chunking.cu | 2 +- cpp/src/io/orc/reader_impl_chunking.hpp | 8 ++++---- cpp/src/io/orc/reader_impl_decode.cu | 4 ++-- 5 files changed, 21 insertions(+), 21 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 63cc0226ea3..b877caff09a 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -32,19 +32,19 @@ void reader_impl::prepare_data(read_mode mode) // This will be no-op if it was called before. preprocess_file(mode); - if (!_chunk_read_data.more_table_chunk_to_output()) { - if (!_chunk_read_data.more_stripe_to_decode() && _chunk_read_data.more_stripe_to_load()) { + if (!_chunk_read_data.more_table_chunks_to_output()) { + if (!_chunk_read_data.more_stripes_to_decode() && _chunk_read_data.more_stripes_to_load()) { // Only load stripe data if: // - There is more stripe to load, and // - All loaded stripes were decoded, and // - All the decoded results were output. - load_data(mode); + load_next_stripe_data(mode); } - if (_chunk_read_data.more_stripe_to_decode()) { + if (_chunk_read_data.more_stripes_to_decode()) { // Only decompress/decode the loaded stripes if: // - There are loaded stripes that were not decoded yet, and // - All the decoded results were output. - decompress_and_decode(mode); + decompress_and_decode_stripes(mode); } } } @@ -55,7 +55,7 @@ table_with_metadata reader_impl::make_output_chunk() if (_selected_columns.num_levels() == 0) { return {std::make_unique
(), table_metadata{}}; } // If no rows or stripes to read, return empty columns. - if (!_chunk_read_data.more_table_chunk_to_output()) { + if (!_chunk_read_data.more_table_chunks_to_output()) { std::vector> out_columns; auto out_metadata = get_meta_with_user_data(); std::transform(_selected_columns.levels[0].begin(), @@ -94,7 +94,7 @@ table_with_metadata reader_impl::make_output_chunk() auto output = std::make_unique
(out_tview, _stream, _mr); // If this is the last slice, we also delete the decoded table to free up memory. - if (!_chunk_read_data.more_table_chunk_to_output()) { + if (!_chunk_read_data.more_table_chunks_to_output()) { _chunk_read_data.decoded_table.reset(nullptr); } diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index b9ec4a74a31..a07ef5d917d 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -118,8 +118,8 @@ class reader_impl { * In this step, the metadata of all stripes in the data sources is parsed, and information about * data streams of the selected columns in all stripes are generated. If the reader has a data * read limit, sizes of these streams are used to split the list of all stripes into multiple - * subsets, each of which will be read into memory in the `load_data()` step. These subsets are - * computed such that memory usage will be kept to be around a fixed size limit. + * subsets, each of which will be read into memory in the `load_next_stripe_data()` step. These + * subsets are computed such that memory usage will be kept to be around a fixed size limit. * * @param mode Value indicating if the data sources are read all at once or chunk by chunk */ @@ -132,23 +132,23 @@ class reader_impl { * their total data size does not exceed a fixed size limit. Then, the data is probed to * estimate its uncompressed sizes, which are in turn used to split that stripe subset into * smaller subsets, each of which to be decompressed and decoded in the next step - * `decompress_and_decode()`. This is to ensure that loading data from data sources together with - * decompression and decoding will be capped around the given data read limit. + * `decompress_and_decode_stripes()`. This is to ensure that loading data from data sources + * together with decompression and decoding will be capped around the given data read limit. * * @param mode Value indicating if the data sources are read all at once or chunk by chunk */ - void load_data(read_mode mode); + void load_next_stripe_data(read_mode mode); /** * @brief Decompress and decode stripe data in the internal buffers, and store the result into * an intermediate table. * * This function expects that the other preprocessing steps (`global preprocess()` and - * `load_data()`) have already been done. + * `load_next_stripe_data()`) have already been done. * * @param mode Value indicating if the data sources are read all at once or chunk by chunk */ - void decompress_and_decode(read_mode mode); + void decompress_and_decode_stripes(read_mode mode); /** * @brief Create the output table from the intermediate table and return it along with metadata. diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 7e96b251868..3afb626de72 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -459,7 +459,7 @@ void reader_impl::preprocess_file(read_mode mode) find_splits(total_stripe_sizes, num_total_stripes, load_limit); } -void reader_impl::load_data(read_mode mode) +void reader_impl::load_next_stripe_data(read_mode mode) { if (_file_itm_data.has_no_data()) { return; } diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 7e58188a0a7..19db24b70f5 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -184,12 +184,12 @@ struct chunk_read_data { // limit. std::vector load_stripe_ranges; std::size_t curr_load_stripe_range{0}; - bool more_stripe_to_load() const { return curr_load_stripe_range < load_stripe_ranges.size(); } + bool more_stripes_to_load() const { return curr_load_stripe_range < load_stripe_ranges.size(); } // Chunks of stripes such that their decompression size is within a size limit. std::vector decode_stripe_ranges; std::size_t curr_decode_stripe_range{0}; - bool more_stripe_to_decode() const + bool more_stripes_to_decode() const { return curr_decode_stripe_range < decode_stripe_ranges.size(); } @@ -198,7 +198,7 @@ struct chunk_read_data { std::vector output_table_ranges; std::size_t curr_output_table_range{0}; std::unique_ptr decoded_table; - bool more_table_chunk_to_output() const + bool more_table_chunks_to_output() const { return curr_output_table_range < output_table_ranges.size(); } @@ -206,7 +206,7 @@ struct chunk_read_data { bool has_next() const { // Only has more chunk to output if: - return more_stripe_to_load() || more_stripe_to_decode() || more_table_chunk_to_output(); + return more_stripes_to_load() || more_stripes_to_decode() || more_table_chunks_to_output(); } }; diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 2ac3f0dfb3b..5ab8516276d 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -710,7 +710,7 @@ std::vector find_table_splits(table_view const& input, } // namespace -void reader_impl::decompress_and_decode(read_mode mode) +void reader_impl::decompress_and_decode_stripes(read_mode mode) { if (_file_itm_data.has_no_data()) { return; } @@ -746,7 +746,7 @@ void reader_impl::decompress_and_decode(read_mode mode) _file_itm_data.rows_to_skip = 0; _file_itm_data.rows_to_read -= rows_to_decode; - // Technically, overflow here should never happen because the `load_data()` step + // Technically, overflow here should never happen because the `load_next_stripe_data()` step // already handled it by splitting the loaded stripe range into multiple decode ranges. CUDF_EXPECTS(rows_to_decode <= static_cast(std::numeric_limits::max()), "Number or rows to decode exceeds the column size limit.", From 252d546a9058c3af8072cc360687a77e5d479344 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 23 Apr 2024 16:08:08 -0700 Subject: [PATCH 299/321] Fix format Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index b877caff09a..048794c3c05 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -51,7 +51,7 @@ void reader_impl::prepare_data(read_mode mode) table_with_metadata reader_impl::make_output_chunk() { - // There is no columns in the table. + // There are no columns in the table. if (_selected_columns.num_levels() == 0) { return {std::make_unique
(), table_metadata{}}; } // If no rows or stripes to read, return empty columns. @@ -119,7 +119,9 @@ table_metadata reader_impl::get_meta_with_user_data() std::transform(meta.ff.metadata.cbegin(), meta.ff.metadata.cend(), std::inserter(kv_map, kv_map.end()), - [](auto const& kv) { return std::pair{kv.name, kv.value}; }); + [](auto const& kv) { + return std::pair{kv.name, kv.value}; + }); return kv_map; }); out_metadata.user_data = {out_metadata.per_file_user_data[0].begin(), From 8ebbb2cb2791a4cc3177d12915b6e18850c9ca11 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 23 Apr 2024 19:59:44 -0700 Subject: [PATCH 300/321] Change comments Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 3afb626de72..0cb73a5f329 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -198,7 +198,7 @@ std::vector find_splits(host_span cumulative_sizes, // If the last range has size smaller than `merge_threshold` the size of the second last one, // merge it with the second last one. - // This is to prevent having too small trailing range. + // This is to prevent having the last range too small. if (splits.size() > 1) { double constexpr merge_threshold = 0.15; if (auto const last = splits.back(), second_last = splits[splits.size() - 2]; @@ -427,8 +427,8 @@ void reader_impl::preprocess_file(read_mode mode) } // - // Split range of all stripes into subranges that can be loaded separately without blowing up - // memory: + // Split range of all stripes into subranges that can be loaded separately while maintaining + // the memory usage under the given pass limit: // // Load range is reset to start from the first position in `load_stripe_ranges`. @@ -588,7 +588,7 @@ void reader_impl::load_next_stripe_data(read_mode mode) rows += stripe_info->numberOfRows; // Here we will split stripe ranges based only on stripes' number of rows, not data size. - // Thus, we override the cumulative `size_bytes` using the prefix sum of rows in stripe and + // Thus, we override the cumulative `size_bytes` using the prefix sum of rows in stripes and // will use the column size limit (`std::numeric_limits::max()`) as split limit. cumulative_stripe_rows[idx] = cumulative_size_and_row{idx + 1UL /*count*/, rows /*size_bytes*/, rows}; From a793eb73134f375a4178d8585913a8cc499e2462 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 23 Apr 2024 20:11:14 -0700 Subject: [PATCH 301/321] Change comments and rename variable Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 2 +- cpp/src/io/orc/reader_impl_chunking.hpp | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 0cb73a5f329..39026783135 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -704,7 +704,7 @@ void reader_impl::load_next_stripe_data(read_mode mode) auto const decode_limit = [&] { auto const tmp = static_cast(_chunk_read_data.pass_read_limit * - chunk_read_data::decode_limit_ratio); + chunk_read_data::decompress_and_decode_limit_ratio); // Make sure not to pass 0 byte limit to `find_splits`. return tmp > 0UL ? tmp : 1UL; }(); diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 19db24b70f5..d7ddf9d50f9 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -57,7 +57,7 @@ struct stripe_level_comp_info { }; /** - * @brief Struct that store source information of an ORC streams. + * @brief Struct that stores source information of an ORC streams. */ struct stream_source_info { std::size_t stripe_idx; // global stripe id throughout all data sources @@ -91,7 +91,7 @@ using stream_source_map = std::unordered_map; /** - * @brief Struct that store information of an ORC stream. + * @brief Struct that stores information of an ORC stream. */ struct orc_stream_info { // Data info: @@ -114,8 +114,8 @@ struct file_intermediate_data { // Return true if no rows or stripes to read. bool has_no_data() const { return rows_to_read == 0 || selected_stripes.empty(); } - // For each stripe, we perform a number of read for its streams. - // Those reads are identified by a chunk of consecutive read info, stored in data_read_info. + // For each stripe, we perform a number of reads for its streams. + // Those reads are identified by a chunk of consecutive read info stored in `data_read_info`. std::vector stripe_data_read_ranges; // Identify what data to read from source. @@ -174,19 +174,19 @@ struct chunk_read_data { size_type const output_row_granularity; // Memory limits for loading data and decoding are computed as - // `load/decode_limit_ratio * pass_read_limit`. + // `*_limit_ratio * pass_read_limit`. // This is to maintain the total memory usage to be **around** the given `pass_read_limit`. // Note that sum of these limits may not be `1.0`, and their values are set empirically. static double constexpr load_limit_ratio{0.25}; - static double constexpr decode_limit_ratio{0.6}; + static double constexpr decompress_and_decode_limit_ratio{0.6}; - // Chunks of stripes that can be load into memory such that their data size is within a size - // limit. + // Chunks of stripes that can be loaded into memory such that their data size is within the user + // specified limit. std::vector load_stripe_ranges; std::size_t curr_load_stripe_range{0}; bool more_stripes_to_load() const { return curr_load_stripe_range < load_stripe_ranges.size(); } - // Chunks of stripes such that their decompression size is within a size limit. + // Chunks of stripes such that their decompression size is within the user specified size limit. std::vector decode_stripe_ranges; std::size_t curr_decode_stripe_range{0}; bool more_stripes_to_decode() const From 1a7c3a959a5e3f44fa71132cde011231d91cfc21 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 24 Apr 2024 09:49:40 -0700 Subject: [PATCH 302/321] Change comments Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 5 +- cpp/src/io/orc/reader_impl.hpp | 2 +- cpp/src/io/orc/reader_impl_chunking.cu | 63 ++++++++++++++++---------- 3 files changed, 43 insertions(+), 27 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 048794c3c05..ae077886015 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -24,6 +24,7 @@ namespace cudf::io::orc::detail { +// This is just the proxy to call all other data preprocessing functions. void reader_impl::prepare_data(read_mode mode) { // There are no columns in the table. @@ -119,9 +120,7 @@ table_metadata reader_impl::get_meta_with_user_data() std::transform(meta.ff.metadata.cbegin(), meta.ff.metadata.cend(), std::inserter(kv_map, kv_map.end()), - [](auto const& kv) { - return std::pair{kv.name, kv.value}; - }); + [](auto const& kv) { return std::pair{kv.name, kv.value}; }); return kv_map; }); out_metadata.user_data = {out_metadata.per_file_user_data[0].begin(), diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index a07ef5d917d..cd30e0e2b91 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -118,7 +118,7 @@ class reader_impl { * In this step, the metadata of all stripes in the data sources is parsed, and information about * data streams of the selected columns in all stripes are generated. If the reader has a data * read limit, sizes of these streams are used to split the list of all stripes into multiple - * subsets, each of which will be read into memory in the `load_next_stripe_data()` step. These + * subsets, each of which will be loaded into memory in the `load_next_stripe_data()` step. These * subsets are computed such that memory usage will be kept to be around a fixed size limit. * * @param mode Value indicating if the data sources are read all at once or chunk by chunk diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 39026783135..4d4e45718d0 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -220,7 +220,7 @@ template std::vector find_splits(host_span find_splits( host_span sizes, std::size_t total_count, std::size_t size_limit); -range get_range(host_span input_ranges, range const& selected_ranges) +inline range get_range(host_span input_ranges, range const& selected_ranges) { // The first and last range. auto const& first_range = input_ranges[selected_ranges.begin]; @@ -230,6 +230,11 @@ range get_range(host_span input_ranges, range const& selected_range return {first_range.begin, last_range.end}; } +// In this step, the metadata of all stripes in the data sources is parsed, and information about +// data streams of the selected columns in all stripes are generated. If the reader has a data +// read limit, sizes of these streams are used to split the list of all stripes into multiple +// subsets, each of which will be loaded into memory in the `load_next_stripe_data()` step. These +// subsets are computed such that memory usage will be kept to be around a fixed size limit. void reader_impl::preprocess_file(read_mode mode) { if (_file_itm_data.global_preprocessed) { return; } @@ -293,9 +298,8 @@ void reader_impl::preprocess_file(read_mode mode) auto& col_meta = *_col_meta; // - // Collect columns' types. + // Collect columns' types: // - for (std::size_t level = 0; level < num_levels; ++level) { lvl_stripe_sizes[level].resize(num_total_stripes); lvl_stripe_stream_ranges[level].resize(num_total_stripes); @@ -355,9 +359,9 @@ void reader_impl::preprocess_file(read_mode mode) auto const load_all_stripes = mode == read_mode::READ_ALL || _chunk_read_data.pass_read_limit == 0; - // Accumulate data size for data streams in each stripe. - // This will be used for CHUNKED_READ mode only. - // If we are in READ_ALL mode, we do not need this since we just load all stripes. + // Accumulate data size for data streams in each stripe, used for chunking. + // This will be used only for CHUNKED_READ mode when there is a read limit. + // Otherwise, we do not need this since we just load all stripes. cudf::detail::hostdevice_vector total_stripe_sizes( load_all_stripes ? std::size_t{0} : num_total_stripes, _stream); @@ -459,6 +463,12 @@ void reader_impl::preprocess_file(read_mode mode) find_splits(total_stripe_sizes, num_total_stripes, load_limit); } +// If there is a data read limit, only a subset of stripes are read at a time such that +// their total data size does not exceed a fixed size limit. Then, the data is probed to +// estimate its uncompressed sizes, which are in turn used to split that stripe subset into +// smaller subsets, each of which to be decompressed and decoded in the next step +// `decompress_and_decode_stripes()`. This is to ensure that loading data from data sources +// together with decompression and decoding will be capped around the given data read limit. void reader_impl::load_next_stripe_data(read_mode mode) { if (_file_itm_data.has_no_data()) { return; } @@ -494,7 +504,7 @@ void reader_impl::load_next_stripe_data(read_mode mode) // If we load data directly from sources into device memory, the loads are also async. // Thus, we need to make sure to sync all them at the end. - std::vector, std::size_t>> read_tasks; + std::vector, std::size_t>> device_read_tasks; // Range of the read info (offset, length) to read for the current being loaded stripes. auto const [read_begin, read_end] = @@ -507,7 +517,7 @@ void reader_impl::load_next_stripe_data(read_mode mode) lvl_stripe_data[read_info.level][read_info.stripe_idx - stripe_start].data()); if (source_ptr->is_device_read_preferred(read_info.length)) { - read_tasks.push_back( + device_read_tasks.push_back( std::pair(source_ptr->device_read_async( read_info.offset, read_info.length, dst_base + read_info.dst_pos, _stream), read_info.length)); @@ -524,11 +534,11 @@ void reader_impl::load_next_stripe_data(read_mode mode) } } - if (host_read_buffers.size() > 0) { + if (host_read_buffers.size() > 0) { // if there was host read _stream.synchronize(); - host_read_buffers.clear(); + host_read_buffers.clear(); // its data was copied to device memory after stream sync } - for (auto& task : read_tasks) { + for (auto& task : device_read_tasks) { // if there was device read CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); } @@ -541,25 +551,32 @@ void reader_impl::load_next_stripe_data(read_mode mode) return count; }(); - // Decoding range is reset to start from the first position in `decode_stripe_ranges`. + // Decoding range needs to be reset to start from the first position in `decode_stripe_ranges`. _chunk_read_data.curr_decode_stripe_range = 0; + // The cudf's column size limit. auto constexpr column_size_limit = static_cast(std::numeric_limits::max()); - // Decode all loaded stripes if there is no read limit, or if we are in READ_ALL mode. + // Decode all loaded stripes if there is no read limit, or if we are in READ_ALL mode, + // and the number of loading rows is less than the column size limit. // In theory, we should just decode 'enough' stripes for output one table chunk, instead of // decoding all stripes like this, for better load-balancing and reduce memory usage. // However, we do not have any good way to know how many stripes are 'enough'. if ((mode == read_mode::READ_ALL || _chunk_read_data.pass_read_limit == 0) && - // In addition to read limit, we also need to check if the the total number of - // rows in the loaded stripes exceeds column size limit. - // If that is the case, we cannot decode all stripes at once. + // In addition to read limit, we also need to check if the total number of + // rows in the loaded stripes exceeds the column size limit. + // If that is the case, we cannot decode all stripes at once into a cudf table. num_loading_rows < column_size_limit) { _chunk_read_data.decode_stripe_ranges = {load_stripe_range}; return; } + // From here, we have reading mode that is either: + // - READ_ALL but the number of reading rows exceeds column size limit, or + // - CHUNKED_READ without read limit but the number of reading rows exceeds column size limit, or + // - CHUNKED_READ with a pass read limit. + // This is the post-processing step after we've done with splitting `load_stripe_range` into // `decode_stripe_ranges`. auto const add_range_offset = [stripe_start](std::vector& new_ranges) { @@ -573,7 +590,7 @@ void reader_impl::load_next_stripe_data(read_mode mode) }; // Optimized code path when we do not have any read limit but the number of rows in the - // loaded stripes exceeds cudf's column size limit. + // loaded stripes exceeds column size limit. // Note that the values `max_uncompressed_size` for each stripe are not computed here. // Instead, they will be computed on the fly during decoding to avoid the overhead of // storing and retrieving from memory. @@ -587,9 +604,9 @@ void reader_impl::load_next_stripe_data(read_mode mode) auto const stripe_info = stripe.stripe_info; rows += stripe_info->numberOfRows; - // Here we will split stripe ranges based only on stripes' number of rows, not data size. + // We will split stripe ranges based only on stripes' number of rows, not data size. // Thus, we override the cumulative `size_bytes` using the prefix sum of rows in stripes and - // will use the column size limit (`std::numeric_limits::max()`) as split limit. + // will use the column size limit as the split size limit. cumulative_stripe_rows[idx] = cumulative_size_and_row{idx + 1UL /*count*/, rows /*size_bytes*/, rows}; } @@ -601,16 +618,16 @@ void reader_impl::load_next_stripe_data(read_mode mode) } // - // Split range of loaded stripes into subranges that can be decoded separately without blowing up - // memory: + // Split range of loaded stripes into subranges that can be decoded separately such that the + // memory usage is maintained around the given limit: // - // For estimating the decompressed sizes of the loaded stripes. + // This is for estimating the decompressed sizes of the loaded stripes. cudf::detail::hostdevice_vector stripe_decomp_sizes(stripe_count, _stream); // Fill up the `cumulative_size_and_row` array with initial values. - // Note: `hostdevice_vector::begin()` mirrors `std::vector::data()` using incorrect name. + // Note: `hostdevice_vector::begin()` mirrors `std::vector::data()` using incorrect API name. for (std::size_t idx = 0; idx < stripe_count; ++idx) { auto const& stripe = _file_itm_data.selected_stripes[idx + stripe_start]; auto const stripe_info = stripe.stripe_info; From 6c3bb4ff0ae15e66d61c4f098fe596ff271a5144 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 24 Apr 2024 09:54:02 -0700 Subject: [PATCH 303/321] Inline a small function Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 10 ---------- cpp/src/io/orc/reader_impl_chunking.hpp | 26 ++++++++++++++++--------- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 4d4e45718d0..9f1b6b137cb 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -220,16 +220,6 @@ template std::vector find_splits(host_span find_splits( host_span sizes, std::size_t total_count, std::size_t size_limit); -inline range get_range(host_span input_ranges, range const& selected_ranges) -{ - // The first and last range. - auto const& first_range = input_ranges[selected_ranges.begin]; - auto const& last_range = input_ranges[selected_ranges.end - 1]; - - // The range of data covered from the first to the last range. - return {first_range.begin, last_range.end}; -} - // In this step, the metadata of all stripes in the data sources is parsed, and information about // data streams of the selected columns in all stripes are generated. If the reader has a data // read limit, sizes of these streams are used to split the list of all stripes into multiple diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index d7ddf9d50f9..6bc36002391 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -36,6 +36,23 @@ struct range { std::size_t end{0}; }; +/** + * @brief Expand a range of ranges into a simple range of data. + * + * @param input_ranges The list of all data ranges + * @param selected_ranges A range of ranges from `input_ranges` + * @return The range of data span by the selected range of ranges + */ +inline range get_range(host_span input_ranges, range const& selected_ranges) +{ + // The first and last range. + auto const& first_range = input_ranges[selected_ranges.begin]; + auto const& last_range = input_ranges[selected_ranges.end - 1]; + + // The range of data covered from the first to the last range. + return {first_range.begin, last_range.end}; +} + // Store information to identify where to read a chunk of data from source. // Each read corresponds to one or more consecutive streams combined. struct stream_data_read_info { @@ -261,15 +278,6 @@ std::vector find_splits(host_span cumulative_sizes, std::size_t total_count, std::size_t size_limit); -/** - * @brief Expand a range of ranges into a simple range of data. - * - * @param input_ranges The list of all data ranges - * @param selected_ranges A range of ranges from `input_ranges` - * @return The range of data span by the selected range of ranges - */ -range get_range(host_span input_ranges, range const& selected_ranges); - /** * @brief Function that populates descriptors for either individual streams or chunks of column * data, but not both. From 673b03426670f92179152bc126278c8580a620aa Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 24 Apr 2024 16:03:10 -0700 Subject: [PATCH 304/321] Fix format Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index ae077886015..cf7b6fecbc6 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -120,7 +120,9 @@ table_metadata reader_impl::get_meta_with_user_data() std::transform(meta.ff.metadata.cbegin(), meta.ff.metadata.cend(), std::inserter(kv_map, kv_map.end()), - [](auto const& kv) { return std::pair{kv.name, kv.value}; }); + [](auto const& kv) { + return std::pair{kv.name, kv.value}; + }); return kv_map; }); out_metadata.user_data = {out_metadata.per_file_user_data[0].begin(), From 767e35fa5832b25b68c1dfbf5dbce530c7ccf489 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 25 Apr 2024 20:28:44 -0700 Subject: [PATCH 305/321] Allocate `null_count_prefix_sums` as just one buffer Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_decode.cu | 87 +++++++++++++++------------- 1 file changed, 47 insertions(+), 40 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 5ab8516276d..e93ed357919 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -485,40 +485,38 @@ void decode_stream_data(int64_t num_dicts, * layer. */ void scan_null_counts(cudf::detail::hostdevice_2dvector const& chunks, - cudf::host_span> prefix_sums, + uint32_t* d_prefix_sums, rmm::cuda_stream_view stream) { auto const num_stripes = chunks.size().first; if (num_stripes == 0) return; auto const num_columns = chunks.size().second; - std::vector>> prefix_sums_to_update; + std::vector> prefix_sums_to_update; for (auto col_idx = 0ul; col_idx < num_columns; ++col_idx) { // Null counts sums are only needed for children of struct columns if (chunks[0][col_idx].type_kind == STRUCT) { - prefix_sums_to_update.emplace_back(col_idx, prefix_sums[col_idx]); + prefix_sums_to_update.emplace_back(col_idx, d_prefix_sums + num_stripes * col_idx); } } auto const d_prefix_sums_to_update = cudf::detail::make_device_uvector_async( prefix_sums_to_update, stream, rmm::mr::get_current_device_resource()); - thrust::for_each(rmm::exec_policy_nosync(stream), - d_prefix_sums_to_update.begin(), - d_prefix_sums_to_update.end(), - [chunks = cudf::detail::device_2dspan{chunks}] __device__( - auto const& idx_psums) { - auto const col_idx = idx_psums.first; - auto const psums = idx_psums.second; - - thrust::transform( - thrust::seq, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(0) + psums.size(), - psums.begin(), - [&](auto stripe_idx) { return chunks[stripe_idx][col_idx].null_count; }); - - thrust::inclusive_scan(thrust::seq, psums.begin(), psums.end(), psums.begin()); - }); + thrust::for_each( + rmm::exec_policy_nosync(stream), + d_prefix_sums_to_update.begin(), + d_prefix_sums_to_update.end(), + [num_stripes, chunks = cudf::detail::device_2dspan{chunks}] __device__( + auto const& idx_psums) { + auto const col_idx = idx_psums.first; + auto const psums = idx_psums.second; + thrust::transform(thrust::seq, + thrust::make_counting_iterator(0ul), + thrust::make_counting_iterator(num_stripes), + psums, + [&](auto stripe_idx) { return chunks[stripe_idx][col_idx].null_count; }); + thrust::inclusive_scan(thrust::seq, psums, psums + num_stripes, psums); + }); // `prefix_sums_to_update` goes out of scope, copy has to be done before we return stream.synchronize(); } @@ -763,7 +761,18 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) std::vector> lvl_chunks(num_levels); // For computing null count. - std::vector>> null_count_prefix_sums(num_levels); + auto null_count_prefix_sums = [&] { + auto const num_total_cols = std::accumulate( + _selected_columns.levels.begin(), + _selected_columns.levels.end(), + std::size_t{0}, + [](auto const& sum, auto const& cols_level) { return sum + cols_level.size(); }); + + return cudf::detail::make_zeroed_device_uvector_async( + num_total_cols * stripe_count, _stream, rmm::mr::get_current_device_resource()); + }(); + std::size_t num_processed_lvl_columns = 0; + std::size_t num_processed_prev_lvl_columns = 0; // For parsing decompression data. // We create one hostdevice_vector that is large enough to use for all levels, @@ -796,9 +805,9 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) auto& stripe_data = _file_itm_data.lvl_stripe_data[level]; auto& chunks = lvl_chunks[level]; - auto const num_level_columns = columns_level.size(); + auto const num_lvl_columns = columns_level.size(); chunks = - cudf::detail::hostdevice_2dvector(stripe_count, num_level_columns, _stream); + cudf::detail::hostdevice_2dvector(stripe_count, num_lvl_columns, _stream); memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); const bool use_index = @@ -809,17 +818,11 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) // TODO: Consider nrows, gpu, and tune the threshold (rows_to_decode > _metadata.get_row_index_stride() && !(_metadata.get_row_index_stride() & 7) && _metadata.get_row_index_stride() != 0 && - num_level_columns * stripe_count < 8 * 128) && + num_lvl_columns * stripe_count < 8 * 128) && // Only use if first row is aligned to a stripe boundary // TODO: Fix logic to handle unaligned rows (rows_to_skip == 0); - null_count_prefix_sums[level].reserve(num_level_columns); - std::generate_n(std::back_inserter(null_count_prefix_sums[level]), num_level_columns, [&]() { - return cudf::detail::make_zeroed_device_uvector_async( - stripe_count, _stream, rmm::mr::get_current_device_resource()); - }); - // 0-based counters, used across all decoding stripes in this step. int64_t stripe_start_row{0}; int64_t num_dict_entries{0}; @@ -863,24 +866,25 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) : 0; // Update chunks to reference streams pointers. - for (std::size_t col_idx = 0; col_idx < num_level_columns; col_idx++) { + for (std::size_t col_idx = 0; col_idx < num_lvl_columns; col_idx++) { auto& chunk = chunks[stripe_local_idx][col_idx]; // start row, number of rows in a each stripe and total number of rows // may change in lower levels of nesting chunk.start_row = (level == 0) ? stripe_start_row - : col_meta.child_start_row[stripe_local_idx * num_level_columns + col_idx]; + : col_meta.child_start_row[stripe_local_idx * num_lvl_columns + col_idx]; chunk.num_rows = (level == 0) ? num_rows_in_stripe - : col_meta.num_child_rows_per_stripe[stripe_local_idx * num_level_columns + col_idx]; + : col_meta.num_child_rows_per_stripe[stripe_local_idx * num_lvl_columns + col_idx]; chunk.column_num_rows = (level == 0) ? rows_to_decode : col_meta.num_child_rows[col_idx]; chunk.parent_validity_info = (level == 0) ? column_validity_info{} : col_meta.parent_column_data[col_idx]; chunk.parent_null_count_prefix_sums = - (level == 0) - ? nullptr - : null_count_prefix_sums[level - 1][col_meta.parent_column_index[col_idx]].data(); + (level == 0) ? nullptr + : null_count_prefix_sums.data() + (num_processed_prev_lvl_columns + + col_meta.parent_column_index[col_idx]) * + stripe_count; chunk.encoding_kind = stripe_footer->columns[columns_level[col_idx].id].kind; chunk.type_kind = _metadata.per_file_metadata[stripe.source_idx].ff.types[columns_level[col_idx].id].kind; @@ -921,10 +925,10 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) // Process dataset chunks into output columns. auto row_groups = - cudf::detail::hostdevice_2dvector(num_rowgroups, num_level_columns, _stream); + cudf::detail::hostdevice_2dvector(num_rowgroups, num_lvl_columns, _stream); if (level > 0 and row_groups.size().first) { cudf::host_span row_groups_span(row_groups.base_host_ptr(), - num_rowgroups * num_level_columns); + num_rowgroups * num_lvl_columns); auto& rw_grp_meta = col_meta.rwgrp_meta; // Update start row and num rows per row group @@ -971,7 +975,7 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) gpu::ParseRowGroupIndex(row_groups.base_device_ptr(), nullptr, chunks.base_device_ptr(), - num_level_columns, + num_lvl_columns, stripe_count, _metadata.get_row_index_stride(), level == 0, @@ -1011,7 +1015,8 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) if (nested_cols.size()) { // Extract information to process nested child columns. - scan_null_counts(chunks, null_count_prefix_sums[level], _stream); + scan_null_counts( + chunks, null_count_prefix_sums.data() + num_processed_lvl_columns * stripe_count, _stream); row_groups.device_to_host_sync(_stream); aggregate_child_meta( @@ -1029,6 +1034,8 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) if (not buff_data.empty()) { generate_offsets_for_list(buff_data, _stream); } } + num_processed_prev_lvl_columns = num_processed_lvl_columns; + num_processed_lvl_columns += num_lvl_columns; } // end loop level // Now generate a table from the decoded result. From ca15afce96eb91ef92db1ca4459dacfea5cffad6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 25 Apr 2024 20:35:56 -0700 Subject: [PATCH 306/321] Change initialization style Signed-off-by: Nghia Truong --- cpp/benchmarks/io/orc/orc_reader_input.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index 1f73374ac20..19810a7d5f1 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -30,7 +30,7 @@ namespace { // run on most GPUs, but large enough to allow highest throughput constexpr int64_t data_size = 512 << 20; constexpr cudf::size_type num_cols = 64; -constexpr std::size_t Mbytes{1024 * 1024}; +constexpr std::size_t Mbytes = 1024 * 1024; template void orc_read_common(cudf::size_type num_rows_to_read, From f34b7b62fb4d7c284085dabef502835866e851cd Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 25 Apr 2024 20:36:02 -0700 Subject: [PATCH 307/321] Change comment Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_decode.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index e93ed357919..3a07007de96 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -757,7 +757,7 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) // Column descriptors ('chunks'). // Each 'chunk' of data here corresponds to an orc column, in a stripe, at a nested level. // Unfortunately we cannot create one hostdevice_vector to use for all levels because - // currently we do not have hostdevice_2dspan exists. + // currently we do not have a hostdevice_2dspan class. std::vector> lvl_chunks(num_levels); // For computing null count. From 1d19ede1c80ea83c3ecea36a24cf2d68f2f90353 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 25 Apr 2024 20:36:13 -0700 Subject: [PATCH 308/321] Reserve vector Signed-off-by: Nghia Truong --- cpp/src/io/orc/aggregate_orc_metadata.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/io/orc/aggregate_orc_metadata.cpp b/cpp/src/io/orc/aggregate_orc_metadata.cpp index ac0dd10856c..0bc1c2209d4 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.cpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.cpp @@ -183,6 +183,7 @@ aggregate_orc_metadata::select_stripes( // user_defined_stripes to get from that source file for (size_t src_file_idx = 0; src_file_idx < user_specified_stripes.size(); ++src_file_idx) { std::vector stripe_infos; + stripe_infos.reserve(user_specified_stripes[src_file_idx].size()); // Coalesce stripe info at the source file later since that makes downstream processing much // easier in impl::read @@ -213,6 +214,7 @@ aggregate_orc_metadata::select_stripes( src_file_idx < per_file_metadata.size() && count < rows_to_skip + rows_to_read; ++src_file_idx) { std::vector stripe_infos; + stripe_infos.reserve(per_file_metadata[src_file_idx].ff.stripes.size()); for (size_t stripe_idx = 0; stripe_idx < per_file_metadata[src_file_idx].ff.stripes.size() && count < rows_to_skip + rows_to_read; From 3ed1e2e4f501f2e8b178b9cd702728def0eb4bb9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 25 Apr 2024 20:54:29 -0700 Subject: [PATCH 309/321] Change variable order Signed-off-by: Nghia Truong --- cpp/benchmarks/io/orc/orc_reader_input.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index 19810a7d5f1..b7c214a8374 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -28,8 +28,8 @@ namespace { // Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to // run on most GPUs, but large enough to allow highest throughput -constexpr int64_t data_size = 512 << 20; constexpr cudf::size_type num_cols = 64; +constexpr std::size_t data_size = 512 << 20; constexpr std::size_t Mbytes = 1024 * 1024; template From 633558665707efaa7f4fb702e036ba3aa84d773e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Apr 2024 13:05:51 -0700 Subject: [PATCH 310/321] Move data to output Signed-off-by: Nghia Truong --- cpp/src/io/orc/aggregate_orc_metadata.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/aggregate_orc_metadata.cpp b/cpp/src/io/orc/aggregate_orc_metadata.cpp index 0bc1c2209d4..94a4d146b35 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.cpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.cpp @@ -267,7 +267,9 @@ aggregate_orc_metadata::select_stripes( if (stripe->indexLength == 0) { row_grp_idx_present = false; } } - output.insert(output.end(), mapping.stripe_info.begin(), mapping.stripe_info.end()); + output.insert(output.end(), + std::make_move_iterator(mapping.stripe_info.begin()), + std::make_move_iterator(mapping.stripe_info.end())); } return {rows_to_skip, rows_to_read, std::move(output)}; From 437c9c0c55c675d8de0dd3c6c22325efbf6281ef Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Apr 2024 13:06:00 -0700 Subject: [PATCH 311/321] Rename function Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 6 +++--- cpp/src/io/orc/reader_impl_chunking.hpp | 7 ++++--- cpp/src/io/orc/reader_impl_decode.cu | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 9f1b6b137cb..9925982f3e7 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -498,7 +498,7 @@ void reader_impl::load_next_stripe_data(read_mode mode) // Range of the read info (offset, length) to read for the current being loaded stripes. auto const [read_begin, read_end] = - get_range(_file_itm_data.stripe_data_read_ranges, load_stripe_range); + merge_selected_ranges(_file_itm_data.stripe_data_read_ranges, load_stripe_range); for (auto read_idx = read_begin; read_idx < read_end; ++read_idx) { auto const& read_info = _file_itm_data.data_read_info[read_idx]; @@ -637,7 +637,7 @@ void reader_impl::load_next_stripe_data(read_mode mode) // Find the maximum number of streams in all levels of the loaded stripes. for (std::size_t level = 0; level < num_levels; ++level) { auto const stream_range = - get_range(_file_itm_data.lvl_stripe_stream_ranges[level], load_stripe_range); + merge_selected_ranges(_file_itm_data.lvl_stripe_stream_ranges[level], load_stripe_range); auto const num_streams = stream_range.end - stream_range.begin; max_num_streams = std::max(max_num_streams, num_streams); } @@ -654,7 +654,7 @@ void reader_impl::load_next_stripe_data(read_mode mode) // Range of all streams in the loaded stripes. auto const stream_range = - get_range(_file_itm_data.lvl_stripe_stream_ranges[level], load_stripe_range); + merge_selected_ranges(_file_itm_data.lvl_stripe_stream_ranges[level], load_stripe_range); auto const num_streams = stream_range.end - stream_range.begin; if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 6bc36002391..9bc1adcb294 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -43,7 +43,8 @@ struct range { * @param selected_ranges A range of ranges from `input_ranges` * @return The range of data span by the selected range of ranges */ -inline range get_range(host_span input_ranges, range const& selected_ranges) +inline range merge_selected_ranges(host_span input_ranges, + range const& selected_ranges) { // The first and last range. auto const& first_range = input_ranges[selected_ranges.begin]; @@ -128,8 +129,8 @@ struct file_intermediate_data { int64_t rows_to_read; std::vector selected_stripes; - // Return true if no rows or stripes to read. - bool has_no_data() const { return rows_to_read == 0 || selected_stripes.empty(); } + // Check if there is data to read. + bool has_data() const { return rows_to_read > 0 && !selected_stripes.empty(); } // For each stripe, we perform a number of reads for its streams. // Those reads are identified by a chunk of consecutive read info stored in `data_read_info`. diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 3a07007de96..7eb2240fade 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -783,7 +783,7 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) // Find the maximum number of streams in all levels of the decoding stripes. for (std::size_t level = 0; level < num_levels; ++level) { auto const stream_range = - get_range(_file_itm_data.lvl_stripe_stream_ranges[level], stripe_range); + merge_selected_ranges(_file_itm_data.lvl_stripe_stream_ranges[level], stripe_range); auto const num_streams = stream_range.end - stream_range.begin; max_num_streams = std::max(max_num_streams, num_streams); } @@ -794,7 +794,7 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) auto& col_meta = *_col_meta; for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto const& stripe_stream_ranges = _file_itm_data.lvl_stripe_stream_ranges[level]; - auto const stream_range = get_range(stripe_stream_ranges, stripe_range); + auto const stream_range = merge_selected_ranges(stripe_stream_ranges, stripe_range); auto const num_streams = stream_range.end - stream_range.begin; auto const& columns_level = _selected_columns.levels[level]; From cc174bbe629f271261cd602cea013bb7a5644577 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Apr 2024 13:06:46 -0700 Subject: [PATCH 312/321] Change `has_no_data` into `has_data` Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 4 ++-- cpp/src/io/orc/reader_impl_decode.cu | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 9925982f3e7..7dffd334a68 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -237,7 +237,7 @@ void reader_impl::preprocess_file(read_mode mode) _file_itm_data.rows_to_skip, _file_itm_data.rows_to_read, _file_itm_data.selected_stripes) = _metadata.select_stripes( _config.selected_stripes, _config.skip_rows, _config.num_read_rows, _stream); - if (_file_itm_data.has_no_data()) { return; } + if (!_file_itm_data.has_data()) { return; } CUDF_EXPECTS( mode == read_mode::CHUNKED_READ || @@ -461,7 +461,7 @@ void reader_impl::preprocess_file(read_mode mode) // together with decompression and decoding will be capped around the given data read limit. void reader_impl::load_next_stripe_data(read_mode mode) { - if (_file_itm_data.has_no_data()) { return; } + if (!_file_itm_data.has_data()) { return; } auto const load_stripe_range = _chunk_read_data.load_stripe_ranges[_chunk_read_data.curr_load_stripe_range++]; diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 7eb2240fade..5387aced269 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -710,7 +710,7 @@ std::vector find_table_splits(table_view const& input, void reader_impl::decompress_and_decode_stripes(read_mode mode) { - if (_file_itm_data.has_no_data()) { return; } + if (!_file_itm_data.has_data()) { return; } CUDF_EXPECTS(_chunk_read_data.curr_load_stripe_range > 0, "There is not any stripe loaded."); From 1e69335d7508fe943a1d2848c7c33178769a9feb Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Apr 2024 13:11:47 -0700 Subject: [PATCH 313/321] Rename variable Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 4 ++-- cpp/src/io/orc/reader_impl_chunking.hpp | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 7dffd334a68..fbb98548068 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -170,7 +170,7 @@ std::vector find_splits(host_span cumulative_sizes, if constexpr (std::is_same_v) { // Similarly, while the returned range has total number of rows exceeds column size limit, // move back one position. - while (split_pos > 0 && cumulative_sizes[split_pos].rows > + while (split_pos > 0 && cumulative_sizes[split_pos].num_rows > cur_cumulative_rows + static_cast(std::numeric_limits::max())) { split_pos--; @@ -192,7 +192,7 @@ std::vector find_splits(host_span cumulative_sizes, cur_cumulative_size = cumulative_sizes[split_pos].size_bytes; if constexpr (std::is_same_v) { - cur_cumulative_rows = cumulative_sizes[split_pos].rows; + cur_cumulative_rows = cumulative_sizes[split_pos].num_rows; } } diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 9bc1adcb294..e20200cfeea 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -34,6 +34,8 @@ namespace cudf::io::orc::detail { struct range { std::size_t begin{0}; std::size_t end{0}; + + [[nodiscard]] auto size() const { return end - begin; } }; /** @@ -243,7 +245,7 @@ struct cumulative_size { struct cumulative_size_and_row { std::size_t count{0}; std::size_t size_bytes{0}; - std::size_t rows{0}; + std::size_t num_rows{0}; }; /** @@ -258,7 +260,8 @@ struct cumulative_size_plus { __device__ cumulative_size_and_row operator()(cumulative_size_and_row const& a, cumulative_size_and_row const& b) const { - return cumulative_size_and_row{a.count + b.count, a.size_bytes + b.size_bytes, a.rows + b.rows}; + return cumulative_size_and_row{ + a.count + b.count, a.size_bytes + b.size_bytes, a.num_rows + b.num_rows}; } }; From ad6923665b92f30f7ae5aefb826dc1e02f29234b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Apr 2024 13:25:43 -0700 Subject: [PATCH 314/321] Implement `size` for range Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 12 ++++-------- cpp/src/io/orc/reader_impl_decode.cu | 8 +++----- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index fbb98548068..1e57139d0fb 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -202,8 +202,7 @@ std::vector find_splits(host_span cumulative_sizes, if (splits.size() > 1) { double constexpr merge_threshold = 0.15; if (auto const last = splits.back(), second_last = splits[splits.size() - 2]; - (last.end - last.begin) <= - static_cast(merge_threshold * (second_last.end - second_last.begin))) { + last.size() <= static_cast(merge_threshold * second_last.size())) { splits.pop_back(); splits.back().end = last.end; } @@ -466,8 +465,7 @@ void reader_impl::load_next_stripe_data(read_mode mode) auto const load_stripe_range = _chunk_read_data.load_stripe_ranges[_chunk_read_data.curr_load_stripe_range++]; auto const stripe_start = load_stripe_range.begin; - auto const stripe_end = load_stripe_range.end; - auto const stripe_count = stripe_end - stripe_start; + auto const stripe_count = load_stripe_range.size(); auto& lvl_stripe_data = _file_itm_data.lvl_stripe_data; auto const num_levels = _selected_columns.num_levels(); @@ -638,8 +636,7 @@ void reader_impl::load_next_stripe_data(read_mode mode) for (std::size_t level = 0; level < num_levels; ++level) { auto const stream_range = merge_selected_ranges(_file_itm_data.lvl_stripe_stream_ranges[level], load_stripe_range); - auto const num_streams = stream_range.end - stream_range.begin; - max_num_streams = std::max(max_num_streams, num_streams); + max_num_streams = std::max(max_num_streams, stream_range.size()); } } return cudf::detail::hostdevice_vector(max_num_streams, _stream); @@ -655,13 +652,12 @@ void reader_impl::load_next_stripe_data(read_mode mode) // Range of all streams in the loaded stripes. auto const stream_range = merge_selected_ranges(_file_itm_data.lvl_stripe_stream_ranges[level], load_stripe_range); - auto const num_streams = stream_range.end - stream_range.begin; if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { auto const& decompressor = *_metadata.per_file_metadata[0].decompressor; auto compinfo = cudf::detail::hostdevice_span( - hd_compinfo.begin(), hd_compinfo.d_begin(), num_streams); + hd_compinfo.begin(), hd_compinfo.d_begin(), stream_range.size()); for (auto stream_idx = stream_range.begin; stream_idx < stream_range.end; ++stream_idx) { auto const& info = stream_info[stream_idx]; auto const dst_base = diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 5387aced269..e1fd61cd582 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -718,7 +718,7 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) _chunk_read_data.decode_stripe_ranges[_chunk_read_data.curr_decode_stripe_range++]; auto const stripe_start = stripe_range.begin; auto const stripe_end = stripe_range.end; - auto const stripe_count = stripe_range.end - stripe_range.begin; + auto const stripe_count = stripe_range.size(); // The start index of loaded stripes. They are different from decoding stripes. auto const load_stripe_range = @@ -784,8 +784,7 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) for (std::size_t level = 0; level < num_levels; ++level) { auto const stream_range = merge_selected_ranges(_file_itm_data.lvl_stripe_stream_ranges[level], stripe_range); - auto const num_streams = stream_range.end - stream_range.begin; - max_num_streams = std::max(max_num_streams, num_streams); + max_num_streams = std::max(max_num_streams, stream_range.size()); } } return cudf::detail::hostdevice_vector{max_num_streams, _stream}; @@ -795,7 +794,6 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) for (std::size_t level = 0; level < _selected_columns.num_levels(); ++level) { auto const& stripe_stream_ranges = _file_itm_data.lvl_stripe_stream_ranges[level]; auto const stream_range = merge_selected_ranges(stripe_stream_ranges, stripe_range); - auto const num_streams = stream_range.end - stream_range.begin; auto const& columns_level = _selected_columns.levels[level]; auto const& stream_info = _file_itm_data.lvl_stream_info[level]; @@ -946,7 +944,7 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) // Setup row group descriptors if using indexes. if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) { auto compinfo = cudf::detail::hostdevice_span( - hd_compinfo.begin(), hd_compinfo.d_begin(), num_streams); + hd_compinfo.begin(), hd_compinfo.d_begin(), stream_range.size()); auto decomp_data = decompress_stripe_data(load_stripe_range, stream_range, stripe_count, From 890abb4e970eab7dc7ec059292e7089db1eccd51 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Apr 2024 13:27:19 -0700 Subject: [PATCH 315/321] Change docs Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index e20200cfeea..0b38a11e2f5 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -29,7 +29,7 @@ namespace cudf::io::orc::detail { /** - * @brief Struct representing a range of data. + * @brief Struct representing a range of of data offsets. */ struct range { std::size_t begin{0}; From cb21a6df264291900aa50464837844ee7ed3cef7 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Apr 2024 13:28:40 -0700 Subject: [PATCH 316/321] Rename `_config` into `_options` Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 22 +++++++++++----------- cpp/src/io/orc/reader_impl.hpp | 2 +- cpp/src/io/orc/reader_impl_chunking.cu | 8 ++++---- cpp/src/io/orc/reader_impl_decode.cu | 4 ++-- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index cf7b6fecbc6..621d4c67691 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -66,9 +66,9 @@ table_with_metadata reader_impl::make_output_chunk() out_metadata.schema_info.emplace_back(""); return create_empty_column(col_meta.id, _metadata, - _config.decimal128_columns, - _config.use_np_dtypes, - _config.timestamp_type, + _options.decimal128_columns, + _options.use_np_dtypes, + _options.timestamp_type, out_metadata.schema_info.back(), _stream); }); @@ -167,13 +167,13 @@ reader_impl::reader_impl(std::size_t chunk_read_limit, rmm::device_async_resource_ref mr) : _stream(stream), _mr(mr), - _config{options.get_timestamp_type(), - options.is_enabled_use_index(), - options.is_enabled_use_np_dtypes(), - options.get_decimal128_columns(), - options.get_skip_rows(), - options.get_num_rows(), - options.get_stripes()}, + _options{options.get_timestamp_type(), + options.is_enabled_use_index(), + options.is_enabled_use_np_dtypes(), + options.get_decimal128_columns(), + options.get_skip_rows(), + options.get_num_rows(), + options.get_stripes()}, _col_meta{std::make_unique()}, _sources(std::move(sources)), _metadata{_sources, stream}, @@ -182,7 +182,7 @@ reader_impl::reader_impl(std::size_t chunk_read_limit, { // Selected columns at different levels of nesting are stored in different elements // of `selected_columns`; thus, size == 1 means no nested columns. - CUDF_EXPECTS(_config.skip_rows == 0 or _selected_columns.num_levels() == 1, + CUDF_EXPECTS(_options.skip_rows == 0 or _selected_columns.num_levels() == 1, "skip_rows is not supported by nested column"); } diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index cd30e0e2b91..4f433653e1b 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -178,7 +178,7 @@ class reader_impl { int64_t const skip_rows; std::optional num_read_rows; std::vector> const selected_stripes; - } const _config; + } const _options; // Intermediate data for reading. std::unique_ptr const _col_meta; // Track of orc mapping and child details diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 1e57139d0fb..98caa677aee 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -235,7 +235,7 @@ void reader_impl::preprocess_file(read_mode mode) std::tie( _file_itm_data.rows_to_skip, _file_itm_data.rows_to_read, _file_itm_data.selected_stripes) = _metadata.select_stripes( - _config.selected_stripes, _config.skip_rows, _config.num_read_rows, _stream); + _options.selected_stripes, _options.skip_rows, _options.num_read_rows, _stream); if (!_file_itm_data.has_data()) { return; } CUDF_EXPECTS( @@ -305,9 +305,9 @@ void reader_impl::preprocess_file(read_mode mode) auto const col_type = to_cudf_type(_metadata.get_col_type(col.id).kind, - _config.use_np_dtypes, - _config.timestamp_type.id(), - to_cudf_decimal_type(_config.decimal128_columns, _metadata, col.id)); + _options.use_np_dtypes, + _options.timestamp_type.id(), + to_cudf_decimal_type(_options.decimal128_columns, _metadata, col.id)); CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type"); auto& column_types = lvl_column_types[level]; diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index e1fd61cd582..1e00779506c 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -809,7 +809,7 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); const bool use_index = - _config.use_index && + _options.use_index && // Do stripes have row group index _metadata.is_row_grp_idx_present() && // Only use if we don't have much work with complete columns & stripes @@ -905,7 +905,7 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) chunk.num_rowgroups = stripe_num_rowgroups; if (chunk.type_kind == orc::TIMESTAMP) { - chunk.timestamp_type_id = _config.timestamp_type.id(); + chunk.timestamp_type_id = _options.timestamp_type.id(); } if (not is_stripe_data_empty) { for (int k = 0; k < gpu::CI_NUM_STREAMS; k++) { From 4e64eb708e94efe156572f336ff8330de5c5b7a6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Apr 2024 13:33:23 -0700 Subject: [PATCH 317/321] Change comments Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 4f433653e1b..94b294087b8 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -194,6 +194,9 @@ class reader_impl { std::vector> _out_buffers; // The default value used for subdividing the decoded table for final output. + // Larger values will reduce the computation time but will make the output table less granular. + // Smaller values (minimum is `1`) will increase the computation time but the output table will + // have size closer to the given `chunk_read_limit`. static inline constexpr size_type DEFAULT_OUTPUT_ROW_GRANULARITY = 10'000; }; From d42ed14487442eadfb787a896def8b509ebd9af8 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Apr 2024 13:34:41 -0700 Subject: [PATCH 318/321] Change `cumulative_size_and_row` to subclass `cumulative_size` Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 0b38a11e2f5..96751876751 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -242,9 +242,7 @@ struct cumulative_size { * @brief Struct to accumulate counts, sizes, and number of rows of some types such as stripes or * rows in tables. */ -struct cumulative_size_and_row { - std::size_t count{0}; - std::size_t size_bytes{0}; +struct cumulative_size_and_row : public cumulative_size { std::size_t num_rows{0}; }; From a0ca33378ec39d90325154d21f5b359782abcc80 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 1 May 2024 21:34:14 -0700 Subject: [PATCH 319/321] Address some review comments Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 18 ++++++++---------- cpp/src/io/orc/reader_impl_chunking.hpp | 2 +- cpp/src/io/orc/reader_impl_decode.cu | 21 ++++++++++----------- 3 files changed, 19 insertions(+), 22 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 98caa677aee..eecef1b0334 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -142,7 +142,7 @@ std::vector find_splits(host_span cumulative_sizes, std::size_t total_count, std::size_t size_limit) { - CUDF_EXPECTS(size_limit > 0, "Invalid size limit"); + CUDF_EXPECTS(size_limit > 0, "Invalid size limit", std::invalid_argument); std::vector splits; std::size_t cur_count{0}; @@ -445,7 +445,7 @@ void reader_impl::preprocess_file(read_mode mode) auto const tmp = static_cast(_chunk_read_data.pass_read_limit * chunk_read_data::load_limit_ratio); // Make sure not to pass 0 byte limit (due to round-off) to `find_splits`. - return tmp > 0UL ? tmp : 1UL; + return std::max(tmp, 1UL); }(); _chunk_read_data.load_stripe_ranges = @@ -531,13 +531,11 @@ void reader_impl::load_next_stripe_data(read_mode mode) } // Compute number of rows in the loading stripes. - auto const num_loading_rows = [&] { - std::size_t count{0}; - for (std::size_t idx = 0; idx < stripe_count; ++idx) { - count += _file_itm_data.selected_stripes[idx + stripe_start].stripe_info->numberOfRows; - } - return count; - }(); + auto const num_loading_rows = std::accumulate( + _file_itm_data.selected_stripes.begin() + stripe_start, + _file_itm_data.selected_stripes.begin() + stripe_start + stripe_count, + std::size_t{0}, + [](std::size_t count, const auto& stripe) { return count + stripe.stripe_info->numberOfRows; }); // Decoding range needs to be reset to start from the first position in `decode_stripe_ranges`. _chunk_read_data.curr_decode_stripe_range = 0; @@ -709,7 +707,7 @@ void reader_impl::load_next_stripe_data(read_mode mode) auto const tmp = static_cast(_chunk_read_data.pass_read_limit * chunk_read_data::decompress_and_decode_limit_ratio); // Make sure not to pass 0 byte limit to `find_splits`. - return tmp > 0UL ? tmp : 1UL; + return std::max(tmp, 1UL); }(); _chunk_read_data.decode_stripe_ranges = diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 96751876751..0ba61004e10 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -272,7 +272,7 @@ struct cumulative_size_plus { * * @param cumulative_sizes The input cumulative sizes to compute split ranges * @param total_count The total count in the entire input - * @param size_limit The given soft limit to compute splits + * @param size_limit The given soft limit to compute splits; must be positive * @return A vector of ranges as splits of the input */ template diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 1e00779506c..ec936b85761 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -88,7 +88,7 @@ rmm::device_buffer decompress_stripe_data( rmm::cuda_stream_view stream) { // Whether we have the comppression info precomputed. - auto const compinfo_ready = compinfo_map.size() > 0; + auto const compinfo_ready = not compinfo_map.empty(); // Count the exact number of compressed blocks std::size_t num_compressed_blocks = 0; @@ -667,8 +667,11 @@ std::vector find_table_splits(table_view const& input, std::size_t size_limit, rmm::cuda_stream_view stream) { - CUDF_EXPECTS(size_limit > 0, "Invalid size limit"); - CUDF_EXPECTS(segment_length > 0, "Invalid segment_length"); + if (size_limit == 0) { + return std::vector{range{0, static_cast(input.num_rows())}}; + } + + CUDF_EXPECTS(segment_length > 0, "Invalid segment_length", std::invalid_argument); // `segmented_row_bit_count` requires that `segment_length` is not larger than number of rows. segment_length = std::min(segment_length, input.num_rows()); @@ -1070,14 +1073,10 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) // Split the decoded table into ranges that be output into chunks having size within the given // output size limit. - _chunk_read_data.output_table_ranges = - _chunk_read_data.chunk_read_limit == 0 - ? std::vector{range{ - 0, static_cast(_chunk_read_data.decoded_table->num_rows())}} - : find_table_splits(_chunk_read_data.decoded_table->view(), - _chunk_read_data.output_row_granularity, - _chunk_read_data.chunk_read_limit, - _stream); + _chunk_read_data.output_table_ranges = find_table_splits(_chunk_read_data.decoded_table->view(), + _chunk_read_data.output_row_granularity, + _chunk_read_data.chunk_read_limit, + _stream); } } // namespace cudf::io::orc::detail From cf18e4c0481e9831fe31c5649dd5753c72fc42a1 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 2 May 2024 13:04:32 -0700 Subject: [PATCH 320/321] Remove handling for `READ_ALL` when number of rows exceed 2B rows Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index eecef1b0334..1a682ccbeef 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -241,7 +241,8 @@ void reader_impl::preprocess_file(read_mode mode) CUDF_EXPECTS( mode == read_mode::CHUNKED_READ || _file_itm_data.rows_to_read <= static_cast(std::numeric_limits::max()), - "READ_ALL mode does not support reading number of rows more than cudf's column size limit.", + "READ_ALL mode does not support reading number of rows more than cudf's column size limit. " + "For reading large number of rows, please use chunked_reader.", std::overflow_error); auto const& selected_stripes = _file_itm_data.selected_stripes; @@ -553,15 +554,19 @@ void reader_impl::load_next_stripe_data(read_mode mode) // In addition to read limit, we also need to check if the total number of // rows in the loaded stripes exceeds the column size limit. // If that is the case, we cannot decode all stripes at once into a cudf table. - num_loading_rows < column_size_limit) { + num_loading_rows <= column_size_limit) { _chunk_read_data.decode_stripe_ranges = {load_stripe_range}; return; } // From here, we have reading mode that is either: - // - READ_ALL but the number of reading rows exceeds column size limit, or // - CHUNKED_READ without read limit but the number of reading rows exceeds column size limit, or // - CHUNKED_READ with a pass read limit. + // READ_ALL mode with number of rows more than cudf's column size limit should be handled early in + // `preprocess_file`. We just check again to make sure such situations never happen here. + CUDF_EXPECTS( + mode != read_mode::READ_ALL, + "READ_ALL mode does not support reading number of rows more than cudf's column size limit."); // This is the post-processing step after we've done with splitting `load_stripe_range` into // `decode_stripe_ranges`. @@ -580,8 +585,7 @@ void reader_impl::load_next_stripe_data(read_mode mode) // Note that the values `max_uncompressed_size` for each stripe are not computed here. // Instead, they will be computed on the fly during decoding to avoid the overhead of // storing and retrieving from memory. - if ((mode == read_mode::READ_ALL || _chunk_read_data.pass_read_limit == 0) && - num_loading_rows >= column_size_limit) { + if (_chunk_read_data.pass_read_limit == 0 && num_loading_rows > column_size_limit) { std::vector cumulative_stripe_rows(stripe_count); std::size_t rows{0}; From 42601b2bcb281bbe7ed6f7d81fbc9628604ec9c9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 2 May 2024 14:50:33 -0700 Subject: [PATCH 321/321] Rename parameter Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl_chunking.cu | 8 ++++---- cpp/src/io/orc/reader_impl_chunking.hpp | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 1a682ccbeef..5034aa14a95 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -37,7 +37,7 @@ namespace cudf::io::orc::detail { std::size_t gather_stream_info_and_column_desc( - std::size_t stripe_order, + std::size_t stripe_id, std::size_t level, orc::StripeInformation const* stripeinfo, orc::StripeFooter const* stripefooter, @@ -93,7 +93,7 @@ std::size_t gather_stream_info_and_column_desc( if (child_idx >= 0) { col = child_idx; if (chunks) { - auto& chunk = (*chunks)[stripe_order][col]; + auto& chunk = (*chunks)[stripe_id][col]; chunk.strm_id[gpu::CI_PRESENT] = *local_stream_order; chunk.strm_len[gpu::CI_PRESENT] = stream.length; } @@ -105,7 +105,7 @@ std::size_t gather_stream_info_and_column_desc( if (src_offset >= stripeinfo->indexLength || use_index) { auto const index_type = get_stream_index_type(stream.kind); if (index_type < gpu::CI_NUM_STREAMS) { - auto& chunk = (*chunks)[stripe_order][col]; + auto& chunk = (*chunks)[stripe_id][col]; chunk.strm_id[index_type] = *local_stream_order; chunk.strm_len[index_type] = stream.length; // NOTE: skip_count field is temporarily used to track the presence of index streams @@ -126,7 +126,7 @@ std::size_t gather_stream_info_and_column_desc( orc_stream_info{stripeinfo->offset + src_offset, dst_offset, stream.length, - stream_source_info{stripe_order, level, column_id, stream.kind}}); + stream_source_info{stripe_id, level, column_id, stream.kind}}); } dst_offset += stream.length; diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp index 0ba61004e10..4ef68ee8d86 100644 --- a/cpp/src/io/orc/reader_impl_chunking.hpp +++ b/cpp/src/io/orc/reader_impl_chunking.hpp @@ -290,7 +290,7 @@ std::vector find_splits(host_span cumulative_sizes, * steps share most of the execution path thus this function takes mutually exclusive parameters * `stream_info` or `chunks` depending on each use case. * - * @param stripe_order The index of the current stripe, can be global index or local decoding index + * @param stripe_id The index of the current stripe, can be global index or local decoding index * @param level The current processing nested level * @param stripeinfo The pointer to current stripe's information * @param stripefooter The pointer to current stripe's footer @@ -305,7 +305,7 @@ std::vector find_splits(host_span cumulative_sizes, * @return The number of bytes in the gathered streams */ std::size_t gather_stream_info_and_column_desc( - std::size_t stripe_order, + std::size_t stripe_id, std::size_t level, orc::StripeInformation const* stripeinfo, orc::StripeFooter const* stripefooter,