diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index cd3b7bf27da..3e63e8fc770 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -376,6 +376,162 @@ table_with_metadata read_parquet( * @{ * @file */ +class table_input_metadata; + +class column_in_metadata { + friend table_input_metadata; + std::string _name = ""; + thrust::optional _nullable; + // TODO: This isn't implemented yet + bool _list_column_is_map = false; + bool _use_int96_timestamp = false; + // bool _output_as_binary = false; + thrust::optional _decimal_precision; + std::vector children; + + public: + /** + * @brief Set the name of this column + * + * @return this for chaining + */ + column_in_metadata& set_name(std::string const& name) + { + _name = name; + return *this; + } + + /** + * @brief Set the nullability of this column + * + * Only valid in case of chunked writes. In single writes, this option is ignored. + * + * @return column_in_metadata& + */ + column_in_metadata& set_nullability(bool nullable) + { + _nullable = nullable; + return *this; + } + + /** + * @brief Specify that this list column should be encoded as a map in the written parquet file + * + * The column must have the structure list>. This option is invalid otherwise + * + * @return this for chaining + */ + column_in_metadata& set_list_column_as_map() + { + _list_column_is_map = true; + return *this; + } + + /** + * @brief Specifies whether this timestamp column should be encoded using the deprecated int96 + * physical type. Only valid for the following column types: + * timestamp_s, timestamp_ms, timestamp_us, timestamp_ns + * + * @param req True = use int96 physical type. False = use int64 physical type + * @return this for chaining + */ + column_in_metadata& set_int96_timestamps(bool req) + { + _use_int96_timestamp = req; + return *this; + } + + /** + * @brief Set the decimal precision of this column. Only valid if this column is a decimal + * (fixed-point) type + * + * @param precision The integer precision to set for this decimal column + * @return this for chaining + */ + column_in_metadata& set_decimal_precision(uint8_t precision) + { + _decimal_precision = precision; + return *this; + } + + /** + * @brief Get reference to a child of this column + * + * @param i Index of the child to get + * @return this for chaining + */ + column_in_metadata& child(size_type i) { return children[i]; } + + /** + * @brief Get const reference to a child of this column + * + * @param i Index of the child to get + * @return this for chaining + */ + column_in_metadata const& child(size_type i) const { return children[i]; } + + /** + * @brief Get the name of this column + */ + std::string get_name() const { return _name; } + + /** + * @brief Get whether nullability has been explicitly set for this column. + */ + bool is_nullability_defined() const { return _nullable.has_value(); } + + /** + * @brief Gets the explicitly set nullability for this column. + * @throws If nullability is not explicitly defined for this column. + * Check using `is_nullability_defined()` first. + */ + bool nullable() const { return _nullable.value(); } + + /** + * @brief If this is the metadata of a list column, returns whether it is to be encoded as a map. + */ + bool is_map() const { return _list_column_is_map; } + + /** + * @brief Get whether to encode this timestamp column using deprecated int96 physical type + */ + bool is_enabled_int96_timestamps() const { return _use_int96_timestamp; } + + /** + * @brief Get whether precision has been set for this decimal column + */ + bool is_decimal_precision_set() const { return _decimal_precision.has_value(); } + + /** + * @brief Get the decimal precision that was set for this column. + * @throws If decimal precision was not set for this column. + * Check using `is_decimal_precision_set()` first. + */ + uint8_t get_decimal_precision() const { return _decimal_precision.value(); } + + /** + * @brief Get the number of children of this column + */ + size_type num_children() const { return children.size(); } +}; + +class table_input_metadata { + public: + table_input_metadata() = default; // Required by cython + + /** + * @brief Construct a new table_input_metadata from a table_view. + * + * The constructed table_input_metadata has the same structure as the passed table_view + * + * @param table The table_view to construct metadata for + * @param user_data Optional Additional metadata to encode, as key-value pairs + */ + table_input_metadata(table_view const& table, std::map user_data = {}); + + std::vector column_metadata; + std::map user_data; //!< Format-dependent metadata as key-values pairs +}; /** * @brief Class to build `parquet_writer_options`. @@ -395,14 +551,12 @@ class parquet_writer_options { // Sets of columns to output table_view _table; // Optional associated metadata - const table_metadata* _metadata = nullptr; - // Parquet writes can write INT96 or TIMESTAMP_MICROS. Defaults to TIMESTAMP_MICROS. + table_input_metadata const* _metadata = nullptr; + // Parquet writer can write INT96 or TIMESTAMP_MICROS. Defaults to TIMESTAMP_MICROS. + // If true then overrides any per-column setting in _metadata. bool _write_timestamps_as_int96 = false; // Column chunks file path to be set in the raw output metadata std::string _column_chunks_file_path; - /// vector of precision values for decimal writing. Exactly one entry - /// per decimal column. Optional unless decimals are being written. - std::vector _decimal_precision; /** * @brief Constructor from sink and table. @@ -465,7 +619,7 @@ class parquet_writer_options { /** * @brief Returns associated metadata. */ - table_metadata const* get_metadata() const { return _metadata; } + table_input_metadata const* get_metadata() const { return _metadata; } /** * @brief Returns `true` if timestamps will be written as INT96 @@ -477,17 +631,12 @@ class parquet_writer_options { */ std::string get_column_chunks_file_path() const { return _column_chunks_file_path; } - /** - * @brief Returns a constant reference to the decimal precision vector. - */ - std::vector const& get_decimal_precision() const { return _decimal_precision; } - /** * @brief Sets metadata. * * @param metadata Associated metadata. */ - void set_metadata(table_metadata const* metadata) { _metadata = metadata; } + void set_metadata(table_input_metadata const* metadata) { _metadata = metadata; } /** * @brief Sets the level of statistics. @@ -520,11 +669,6 @@ class parquet_writer_options { { _column_chunks_file_path.assign(file_path); } - - /** - * @brief Sets the decimal precision vector data. - */ - void set_decimal_precision(std::vector dp) { _decimal_precision = std::move(dp); } }; class parquet_writer_options_builder { @@ -555,7 +699,7 @@ class parquet_writer_options_builder { * @param metadata Associated metadata. * @return this for chaining. */ - parquet_writer_options_builder& metadata(table_metadata const* metadata) + parquet_writer_options_builder& metadata(table_input_metadata const* metadata) { options._metadata = metadata; return *this; @@ -672,11 +816,10 @@ class chunked_parquet_writer_options { // Specify the level of statistics in the output file statistics_freq _stats_level = statistics_freq::STATISTICS_ROWGROUP; // Optional associated metadata. - const table_metadata_with_nullability* _nullable_metadata = nullptr; - // Parquet writes can write INT96 or TIMESTAMP_MICROS. Defaults to TIMESTAMP_MICROS. + table_input_metadata const* _metadata = nullptr; + // Parquet writer can write INT96 or TIMESTAMP_MICROS. Defaults to TIMESTAMP_MICROS. + // If true then overrides any per-column setting in _metadata. bool _write_timestamps_as_int96 = false; - // Optional decimal precision data - must be present if writing decimals - std::vector _decimal_precision = {}; /** * @brief Constructor from sink. @@ -711,17 +854,9 @@ class chunked_parquet_writer_options { statistics_freq get_stats_level() const { return _stats_level; } /** - * @brief Returns nullable metadata information. + * @brief Returns metadata information. */ - const table_metadata_with_nullability* get_nullable_metadata() const - { - return _nullable_metadata; - } - - /** - * @brief Returns decimal precision pointer. - */ - std::vector const& get_decimal_precision() const { return _decimal_precision; } + table_input_metadata const* get_metadata() const { return _metadata; } /** * @brief Returns `true` if timestamps will be written as INT96 @@ -729,22 +864,11 @@ class chunked_parquet_writer_options { bool is_enabled_int96_timestamps() const { return _write_timestamps_as_int96; } /** - * @brief Sets nullable metadata. + * @brief Sets metadata. * * @param metadata Associated metadata. */ - void set_nullable_metadata(const table_metadata_with_nullability* metadata) - { - _nullable_metadata = metadata; - } - - /** - * @brief Sets decimal precision data. - * - * @param v Vector of precision data flattened with exactly one entry per - * decimal column. - */ - void set_decimal_precision_data(std::vector const& v) { _decimal_precision = v; } + void set_metadata(table_input_metadata const* metadata) { _metadata = metadata; } /** * @brief Sets the level of statistics in parquet_writer_options. @@ -797,15 +921,14 @@ class chunked_parquet_writer_options_builder { chunked_parquet_writer_options_builder(sink_info const& sink) : options(sink){}; /** - * @brief Sets nullable metadata to chunked_parquet_writer_options. + * @brief Sets metadata to chunked_parquet_writer_options. * * @param metadata Associated metadata. * @return this for chaining. */ - chunked_parquet_writer_options_builder& nullable_metadata( - const table_metadata_with_nullability* metadata) + chunked_parquet_writer_options_builder& metadata(table_input_metadata const* metadata) { - options._nullable_metadata = metadata; + options._metadata = metadata; return *this; } @@ -821,18 +944,6 @@ class chunked_parquet_writer_options_builder { return *this; } - /** - * @brief Sets decimal precision data. - * - * @param v Vector of precision data flattened with exactly one entry per - * decimal column. - */ - chunked_parquet_writer_options_builder& decimal_precision(std::vector const& v) - { - options._decimal_precision = v; - return *this; - } - /** * @brief Sets compression type to chunked_parquet_writer_options. * diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index 1b7635f8d0d..bc6d36a0328 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -419,6 +419,22 @@ std::unique_ptr> merge_rowgroup_metadata( return detail_parquet::writer::merge_rowgroup_metadata(metadata_list); } +table_input_metadata::table_input_metadata(table_view const& table, + std::map user_data) + : user_data{std::move(user_data)} +{ + // Create a metadata hierarchy using `table` + std::function get_children = [&](column_view const& col) { + auto col_meta = column_in_metadata{}; + std::transform( + col.child_begin(), col.child_end(), std::back_inserter(col_meta.children), get_children); + return col_meta; + }; + + std::transform( + table.begin(), table.end(), std::back_inserter(this->column_metadata), get_children); +} + /** * @copydoc cudf::io::write_parquet */ diff --git a/cpp/src/io/parquet/page_dict.cu b/cpp/src/io/parquet/page_dict.cu index d984cc1e44f..46d471d5cf7 100644 --- a/cpp/src/io/parquet/page_dict.cu +++ b/cpp/src/io/parquet/page_dict.cu @@ -36,7 +36,7 @@ struct dict_state_s { uint32_t num_dict_entries; //!< Dictionary entries in current fragment to add uint32_t frag_dict_size; EncColumnChunk ck; - EncColumnDesc col; + parquet_column_device_view col; PageFragment frag; volatile uint32_t scratch_red[32]; uint16_t frag_dict[max_page_fragment_size]; diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 8b99248e2fd..3b29394686f 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -44,7 +44,7 @@ constexpr int init_hash_bits = 12; constexpr uint32_t rle_buffer_size = (1 << 9); struct frag_init_state_s { - EncColumnDesc col; + parquet_column_device_view col; PageFragment frag; uint32_t total_dupes; size_type start_value_idx; @@ -70,7 +70,7 @@ struct page_enc_state_s { volatile uint32_t scratch_red[32]; EncPage page; EncColumnChunk ck; - EncColumnDesc col; + parquet_column_device_view col; gpu_inflate_input_s comp_in; gpu_inflate_status_s comp_out; uint16_t vals[rle_buffer_size]; @@ -111,12 +111,13 @@ inline __device__ uint32_t uint64_init_hash(uint64_t v) */ // blockDim {512,1,1} template -__global__ void __launch_bounds__(block_size) gpuInitPageFragments(PageFragment *frag, - const EncColumnDesc *col_desc, - int32_t num_fragments, - int32_t num_columns, - uint32_t fragment_size, - uint32_t max_num_rows) +__global__ void __launch_bounds__(block_size) + gpuInitPageFragments(PageFragment *frag, + const parquet_column_device_view *col_desc, + int32_t num_fragments, + int32_t num_columns, + uint32_t fragment_size, + uint32_t max_num_rows) { __shared__ __align__(16) frag_init_state_s state_g; @@ -158,12 +159,18 @@ __global__ void __launch_bounds__(block_size) gpuInitPageFragments(PageFragment } else { auto col = *(s->col.parent_column); auto current_start_value_idx = start_row; - while (col.type().id() == type_id::LIST) { - auto offset_col = col.child(lists_column_view::offsets_column_index); - current_start_value_idx = - offset_col.element(current_start_value_idx + col.offset()); - end_value_idx = offset_col.element(end_value_idx + col.offset()); - col = col.child(lists_column_view::child_column_index); + while (col.type().id() == type_id::LIST or col.type().id() == type_id::STRUCT) { + if (col.type().id() == type_id::STRUCT) { + current_start_value_idx += col.offset(); + end_value_idx += col.offset(); + col = col.child(0); + } else { + auto offset_col = col.child(lists_column_view::offsets_column_index); + current_start_value_idx = + offset_col.element(current_start_value_idx + col.offset()); + end_value_idx = offset_col.element(end_value_idx + col.offset()); + col = col.child(lists_column_view::child_column_index); + } } s->start_value_idx = current_start_value_idx; } @@ -372,12 +379,13 @@ __global__ void __launch_bounds__(block_size) gpuInitPageFragments(PageFragment } // blockDim {128,1,1} -__global__ void __launch_bounds__(128) gpuInitFragmentStats(statistics_group *groups, - const PageFragment *fragments, - const EncColumnDesc *col_desc, - int32_t num_fragments, - int32_t num_columns, - uint32_t fragment_size) +__global__ void __launch_bounds__(128) + gpuInitFragmentStats(statistics_group *groups, + const PageFragment *fragments, + const parquet_column_device_view *col_desc, + int32_t num_fragments, + int32_t num_columns, + uint32_t fragment_size) { __shared__ __align__(8) statistics_group group_g[4]; @@ -397,13 +405,13 @@ __global__ void __launch_bounds__(128) gpuInitFragmentStats(statistics_group *gr // blockDim {128,1,1} __global__ void __launch_bounds__(128) gpuInitPages(EncColumnChunk *chunks, EncPage *pages, - const EncColumnDesc *col_desc, + const parquet_column_device_view *col_desc, statistics_merge_group *page_grstats, statistics_merge_group *chunk_grstats, int32_t num_rowgroups, int32_t num_columns) { - __shared__ __align__(8) EncColumnDesc col_g; + __shared__ __align__(8) parquet_column_device_view col_g; __shared__ __align__(8) EncColumnChunk ck_g; __shared__ __align__(8) PageFragment frag_g; __shared__ __align__(8) EncPage page_g; @@ -541,8 +549,8 @@ __global__ void __launch_bounds__(128) gpuInitPages(EncColumnChunk *chunks, page_g.num_rows = rows_in_page; page_g.num_leaf_values = leaf_values_in_page; page_g.num_values = values_in_page; - uint32_t def_level_bits = col_g.level_bits & 0xf; - uint32_t rep_level_bits = col_g.level_bits >> 4; + uint32_t def_level_bits = col_g.num_def_level_bits(); + uint32_t rep_level_bits = col_g.num_rep_level_bits(); // Run length = 4, max(rle/bitpack header) = 5, add one byte per 256 values for overhead // TODO (dm): Improve readability of these calculations. uint32_t def_level_size = @@ -936,10 +944,12 @@ __global__ void __launch_bounds__(128, 8) gpuEncodePages(EncPage *pages, __syncthreads(); // Encode Repetition and Definition levels - if (s->page.page_type != PageType::DICTIONARY_PAGE && s->col.level_bits != 0 && - s->col.parent_column == nullptr) { + if (s->page.page_type != PageType::DICTIONARY_PAGE && + (s->col.num_def_level_bits()) != 0 && // This means max definition level is not 0 (nullable) + (s->col.num_rep_level_bits()) == 0 // This means there are no repetition levels (non-list) + ) { // Calculate definition levels from validity - uint32_t def_lvl_bits = s->col.level_bits & 0xf; + uint32_t def_lvl_bits = s->col.num_def_level_bits(); if (def_lvl_bits != 0) { if (!t) { s->rle_run = 0; @@ -954,9 +964,32 @@ __global__ void __launch_bounds__(128, 8) gpuEncodePages(EncPage *pages, uint32_t row = s->page.start_row + rle_numvals + t; // Definition level encodes validity. Checks the valid map and if it is valid, then sets the // def_lvl accordingly and sets it in s->vals which is then given to RleEncode to encode - uint32_t def_lvl = (rle_numvals + t < s->page.num_rows && row < s->col.num_rows) - ? s->col.leaf_column->is_valid(row) - : 0; + uint32_t def_lvl = [&]() { + bool within_bounds = rle_numvals + t < s->page.num_rows && row < s->col.num_rows; + if (not within_bounds) { return 0u; } + uint32_t def = 0; + size_type l = 0; + bool is_col_struct = false; + auto col = *s->col.parent_column; + do { + // If col not nullable then it does not contribute to def levels + if (s->col.nullability[l]) { + if (col.is_valid(row)) { + ++def; + } else { + // We have found the shallowest level at which this row is null + break; + } + } + is_col_struct = (col.type().id() == type_id::STRUCT); + if (is_col_struct) { + row += col.offset(); + col = col.child(0); + ++l; + } + } while (is_col_struct); + return def; + }(); s->vals[(rle_numvals + t) & (rle_buffer_size - 1)] = def_lvl; __syncthreads(); rle_numvals += nrows; @@ -974,7 +1007,9 @@ __global__ void __launch_bounds__(128, 8) gpuEncodePages(EncPage *pages, if (t == 0) { s->cur = rle_out; } } } - } else if (s->page.page_type != PageType::DICTIONARY_PAGE && s->col.parent_column != nullptr) { + } else if (s->page.page_type != PageType::DICTIONARY_PAGE && + s->col.num_rep_level_bits() != 0 // This means there ARE repetition levels (has list) + ) { auto encode_levels = [&](uint8_t const *lvl_val_data, uint32_t nbits) { // For list types, the repetition and definition levels are pre-calculated. We just need to // encode and write them now. @@ -1010,9 +1045,9 @@ __global__ void __launch_bounds__(128, 8) gpuEncodePages(EncPage *pages, if (t == 0) { s->cur = rle_out; } } }; - encode_levels(s->col.rep_values, s->col.level_bits >> 4); + encode_levels(s->col.rep_values, s->col.num_rep_level_bits()); __syncthreads(); - encode_levels(s->col.def_values, s->col.level_bits & 0xf); + encode_levels(s->col.def_values, s->col.num_def_level_bits()); } // Encode data values __syncthreads(); @@ -1041,10 +1076,15 @@ __global__ void __launch_bounds__(128, 8) gpuEncodePages(EncPage *pages, if (s->col.parent_column != nullptr) { auto col = *(s->col.parent_column); auto current_page_start_val = s->page_start_val; - while (col.type().id() == type_id::LIST) { - current_page_start_val = col.child(lists_column_view::offsets_column_index) - .element(current_page_start_val + col.offset()); - col = col.child(lists_column_view::child_column_index); + while (col.type().id() == type_id::LIST or col.type().id() == type_id::STRUCT) { + if (col.type().id() == type_id::STRUCT) { + current_page_start_val += col.offset(); + col = col.child(0); + } else { + current_page_start_val = col.child(lists_column_view::offsets_column_index) + .element(current_page_start_val + col.offset()); + col = col.child(lists_column_view::child_column_index); + } } s->page_start_val = current_page_start_val; } @@ -1156,11 +1196,13 @@ __global__ void __launch_bounds__(128, 8) gpuEncodePages(EncPage *pages, auto const ret = convert_nanoseconds([&]() { using namespace cuda::std::chrono; - switch (s->col.converted_type) { - case TIMESTAMP_MILLIS: { + switch (s->col.leaf_column->type().id()) { + case type_id::TIMESTAMP_SECONDS: + case type_id::TIMESTAMP_MILLISECONDS: { return sys_time{milliseconds{v}}; } break; - case TIMESTAMP_MICROS: { + case type_id::TIMESTAMP_MICROSECONDS: + case type_id::TIMESTAMP_NANOSECONDS: { return sys_time{microseconds{v}}; } break; } @@ -1383,7 +1425,7 @@ class header_encoder { __device__ uint8_t *EncodeStatistics(uint8_t *start, const statistics_chunk *s, - const EncColumnDesc *col, + const parquet_column_device_view *col, float *fp_scratch) { uint8_t *end, dtype, dtype_len; @@ -1441,7 +1483,7 @@ __global__ void __launch_bounds__(128) gpuEncodePageHeaders(EncPage *pages, const statistics_chunk *chunk_stats, uint32_t start_page) { - __shared__ __align__(8) EncColumnDesc col_g; + __shared__ __align__(8) parquet_column_device_view col_g; __shared__ __align__(8) EncColumnChunk ck_g; __shared__ __align__(8) EncPage page_g; __shared__ __align__(8) float fp_scratch[2]; @@ -1567,6 +1609,42 @@ __global__ void __launch_bounds__(1024) gpuGatherPages(EncColumnChunk *chunks, c } } +/** + * @brief Functor to get definition level value for a nested struct column until the leaf level or + * the first list level. + * + */ +struct def_level_fn { + column_device_view const *parent_col; + uint8_t const *d_nullability; + uint8_t sub_level_start; + uint8_t curr_def_level; + + __device__ uint32_t operator()(size_type i) + { + uint32_t def = curr_def_level; + uint8_t l = sub_level_start; + bool is_col_struct = false; + auto col = *parent_col; + do { + // If col not nullable then it does not contribute to def levels + if (d_nullability[l]) { + if (not col.nullable() or bit_is_set(col.null_mask(), i)) { + ++def; + } else { // We have found the shallowest level at which this row is null + break; + } + } + is_col_struct = (col.type().id() == type_id::STRUCT); + if (is_col_struct) { + col = col.child(0); + ++l; + } + } while (is_col_struct); + return def; + } +}; + /** * @brief Get the dremel offsets and repetition and definition levels for a LIST column * @@ -1633,16 +1711,53 @@ __global__ void __launch_bounds__(1024) gpuGatherPages(EncColumnChunk *chunks, c * ``` * * Similarly we merge up all the way till level 0 offsets + * + * STRUCT COLUMNS : + * In case of struct columns, we don't have to merge struct levels with their children because a + * struct is the same size as its children. e.g. for a column `struct`, if the row `i` + * is null, then the children columns `int` and `float` are also null at `i`. They also have the + * null entry represented in their respective null masks. So for any case of strictly struct based + * nesting, we can get the definition levels merely by iterating over the nesting for the same row. + * + * In case struct and lists are intermixed, the definition levels of all the contiguous struct + * levels can be constructed using the aforementioned iterative method. Only when we reach a list + * level, we need to do a merge with the subsequent level. + * + * So, for a column like `struct>`, we are going to merge between the levels `struct>`, we are going to merge between `list` and `struct`. + * + * In general, one nesting level is the list level and any struct level that precedes it. + * + * A few more examples to visualize the partitioning of column hierarchy into nesting levels: + * (L is list, S is struct, i is integer(leaf data level), angle brackets omitted) + * ``` + * 1. LSi = L Si + * - | -- + * + * 2. LLSi = L L Si + * - | - | -- + * + * 3. SSLi = SSL i + * --- | - + * + * 4. LLSLSSi = L L SL SSi + * - | - | -- | --- +``` */ dremel_data get_dremel_data(column_view h_col, - std::vector const &level_nullability, + // TODO(cp): use device_span once it is converted to a single hd_vec + rmm::device_uvector const &d_nullability, + std::vector const &nullability, rmm::cuda_stream_view stream) { - CUDF_EXPECTS(h_col.type().id() == type_id::LIST, - "Can only get rep/def levels for LIST type column"); + auto get_list_level = [](column_view col) { + while (col.type().id() == type_id::STRUCT) { col = col.child(0); } + return col; + }; auto get_empties = [&](column_view col, size_type start, size_type end) { - auto lcv = lists_column_view(col); + auto lcv = lists_column_view(get_list_level(col)); rmm::device_uvector empties_idx(lcv.size(), stream); rmm::device_uvector empties(lcv.size(), stream); auto d_off = lcv.offsets().data(); @@ -1663,38 +1778,60 @@ dremel_data get_dremel_data(column_view h_col, return std::make_tuple(std::move(empties), std::move(empties_idx), empties_size); }; - // Reverse the nesting in order to merge the deepest level with the leaf first and merge bottom - // up - auto curr_col = h_col; - size_t max_vals_size = 0; + auto curr_col = h_col; std::vector nesting_levels; std::vector def_at_level; - size_type level = 0; - auto add_def_at_level = [&](size_type level) { - auto is_level_nullable = - curr_col.nullable() or (not level_nullability.empty() and level_nullability[level]); - def_at_level.push_back(is_level_nullable ? 2 : 1); + std::vector start_at_sub_level; + uint8_t curr_nesting_level_idx = 0; + + auto add_def_at_level = [&](column_view col) { + // Add up all def level contributions in this column all the way till the first list column + // appears in the hierarchy or until we get to leaf + uint32_t def = 0; + start_at_sub_level.push_back(curr_nesting_level_idx); + while (col.type().id() == type_id::STRUCT) { + def += (nullability[curr_nesting_level_idx]) ? 1 : 0; + col = col.child(0); + ++curr_nesting_level_idx; + } + // At the end of all those structs is either a list column or the leaf. Leaf column contributes + // at least one def level. It doesn't matter what the leaf contributes because it'll be at the + // end of the exclusive scan. + def += (nullability[curr_nesting_level_idx]) ? 2 : 1; + def_at_level.push_back(def); + ++curr_nesting_level_idx; }; - while (curr_col.type().id() == type_id::LIST) { + while (cudf::is_nested(curr_col.type())) { nesting_levels.push_back(curr_col); - add_def_at_level(level); - auto lcv = lists_column_view(curr_col); - max_vals_size += lcv.offsets().size(); - curr_col = lcv.child(); - level++; + add_def_at_level(curr_col); + while (curr_col.type().id() == type_id::STRUCT) { + // Go down the hierarchy until we get to the LIST or the leaf level + curr_col = curr_col.child(0); + } + if (curr_col.type().id() == type_id::LIST) { + curr_col = curr_col.child(lists_column_view::child_column_index); + if (not is_nested(curr_col.type())) { + // Special case: when the leaf data column is the immediate child of the list col then we + // want it to be included right away. Otherwise the struct containing it will be included in + // the next iteration of this loop. + nesting_levels.push_back(curr_col); + add_def_at_level(curr_col); + break; + } + } } - // One more entry for leaf col - add_def_at_level(level); - max_vals_size += curr_col.size(); - // Add one more value at the end so that we can have the max def level - def_at_level.push_back(0); + std::unique_ptr device_view_owners; + column_device_view *d_nesting_levels; + std::tie(device_view_owners, d_nesting_levels) = + contiguous_copy_column_device_views(nesting_levels, stream); + thrust::exclusive_scan( thrust::host, def_at_level.begin(), def_at_level.end(), def_at_level.begin()); // Sliced list column views only have offsets applied to top level. Get offsets for each level. - rmm::device_uvector d_column_offsets(nesting_levels.size() + 1, stream); - rmm::device_uvector d_column_ends(nesting_levels.size() + 1, stream); + rmm::device_uvector d_column_offsets(nesting_levels.size(), stream); + rmm::device_uvector d_column_ends(nesting_levels.size(), stream); auto d_col = column_device_view::create(h_col, stream); cudf::detail::device_single_thread( @@ -1709,24 +1846,29 @@ dremel_data get_dremel_data(column_view h_col, end_idx_at_level[level] = end; ++level; // Apply offset recursively until we get to leaf data - while (curr_col.type().id() == type_id::LIST) { - off = curr_col.child(lists_column_view::offsets_column_index).element(off); - end = curr_col.child(lists_column_view::offsets_column_index).element(end); - offset_at_level[level] = off; - end_idx_at_level[level] = end; - ++level; - curr_col = curr_col.child(lists_column_view::child_column_index); + // Skip doing the following for any structs we encounter in between. + while (curr_col.type().id() == type_id::LIST or curr_col.type().id() == type_id::STRUCT) { + if (curr_col.type().id() == type_id::LIST) { + off = curr_col.child(lists_column_view::offsets_column_index).element(off); + end = curr_col.child(lists_column_view::offsets_column_index).element(end); + offset_at_level[level] = off; + end_idx_at_level[level] = end; + ++level; + curr_col = curr_col.child(lists_column_view::child_column_index); + } else { + curr_col = curr_col.child(0); + } } }, stream); - thrust::host_vector column_offsets(nesting_levels.size() + 1); + thrust::host_vector column_offsets(d_column_offsets.size()); CUDA_TRY(cudaMemcpyAsync(column_offsets.data(), d_column_offsets.data(), d_column_offsets.size() * sizeof(size_type), cudaMemcpyDeviceToHost, stream.value())); - thrust::host_vector column_ends(nesting_levels.size() + 1); + thrust::host_vector column_ends(d_column_ends.size()); CUDA_TRY(cudaMemcpyAsync(column_ends.data(), d_column_ends.data(), d_column_ends.size() * sizeof(size_type), @@ -1735,6 +1877,11 @@ dremel_data get_dremel_data(column_view h_col, stream.synchronize(); + size_t max_vals_size = 0; + for (size_t l = 0; l < column_offsets.size(); ++l) { + max_vals_size += column_ends[l] - column_offsets[l]; + } + rmm::device_uvector rep_level(max_vals_size, stream); rmm::device_uvector def_level(max_vals_size, stream); @@ -1745,9 +1892,13 @@ dremel_data get_dremel_data(column_view h_col, { // At this point, curr_col contains the leaf column. Max nesting level is // nesting_levels.size(). - size_t level = nesting_levels.size() - 1; + + // We are going to start by merging the last column in nesting_levels (the leaf, which is at the + // index `nesting_levels.size() - 1`) with the second-to-last (which is at + // `nesting_levels.size() - 2`). + size_t level = nesting_levels.size() - 2; curr_col = nesting_levels[level]; - auto lcv = lists_column_view(curr_col); + auto lcv = lists_column_view(get_list_level(curr_col)); auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1; // Get empties at this level @@ -1760,25 +1911,21 @@ dremel_data get_dremel_data(column_view h_col, // Merge empty at deepest parent level with the rep, def level vals at leaf level auto input_parent_rep_it = thrust::make_constant_iterator(level); - auto input_parent_def_it = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), - [idx = empties_idx.data(), - mask = lcv.null_mask(), - level_nullable = level_nullability.empty() ? false : level_nullability[level], - curr_def_level = def_at_level[level]] __device__(auto i) { - return curr_def_level + - ((mask && bit_is_set(mask, idx[i]) or (!mask && level_nullable)) ? 1 : 0); - }); - - auto input_child_rep_it = thrust::make_constant_iterator(nesting_levels.size()); - auto input_child_def_it = thrust::make_transform_iterator( - thrust::make_counting_iterator(column_offsets[level + 1]), - [mask = lcv.child().null_mask(), - level_nullable = level_nullability.empty() ? false : level_nullability[level + 1], - curr_def_level = def_at_level[level + 1]] __device__(auto i) { - return curr_def_level + - ((mask && bit_is_set(mask, i) or (!mask && level_nullable)) ? 1 : 0); - }); + auto input_parent_def_it = + thrust::make_transform_iterator(empties_idx.begin(), + def_level_fn{d_nesting_levels + level, + d_nullability.data(), + start_at_sub_level[level], + def_at_level[level]}); + + // `nesting_levels.size()` == no of list levels + leaf. Max repetition level = no of list levels + auto input_child_rep_it = thrust::make_constant_iterator(nesting_levels.size() - 1); + auto input_child_def_it = + thrust::make_transform_iterator(thrust::make_counting_iterator(column_offsets[level + 1]), + def_level_fn{d_nesting_levels + level + 1, + d_nullability.data(), + start_at_sub_level[level + 1], + def_at_level[level + 1]}); // Zip the input and output value iterators so that merge operation is done only once auto input_parent_zip_it = @@ -1831,9 +1978,11 @@ dremel_data get_dremel_data(column_view h_col, rep_level.begin()); } - for (int level = nesting_levels.size() - 2; level >= 0; level--) { + // Having already merged the last two levels, we are now going to merge the result with the + // third-last level which is at index `nesting_levels.size() - 3`. + for (int level = nesting_levels.size() - 3; level >= 0; level--) { curr_col = nesting_levels[level]; - auto lcv = lists_column_view(curr_col); + auto lcv = lists_column_view(get_list_level(curr_col)); auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1; // Get empties at this level @@ -1857,15 +2006,12 @@ dremel_data get_dremel_data(column_view h_col, auto transformed_empties = thrust::make_transform_iterator(empties.begin(), offset_transformer); auto input_parent_rep_it = thrust::make_constant_iterator(level); - auto input_parent_def_it = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), - [idx = empties_idx.data(), - mask = lcv.null_mask(), - level_nullable = level_nullability.empty() ? false : level_nullability[level], - curr_def_level = def_at_level[level]] __device__(auto i) { - return curr_def_level + - ((mask && bit_is_set(mask, idx[i]) or (!mask && level_nullable)) ? 1 : 0); - }); + auto input_parent_def_it = + thrust::make_transform_iterator(empties_idx.begin(), + def_level_fn{d_nesting_levels + level, + d_nullability.data(), + start_at_sub_level[level], + def_at_level[level]}); // Zip the input and output value iterators so that merge operation is done only once auto input_parent_zip_it = @@ -1927,16 +2073,10 @@ dremel_data get_dremel_data(column_view h_col, stream.synchronize(); - size_type leaf_col_offset = column_offsets[column_offsets.size() - 1]; - size_type leaf_data_size = column_ends[column_ends.size() - 1] - leaf_col_offset; - uint8_t max_def_level = def_at_level.back() - 1; + size_type leaf_data_size = column_ends.back() - column_offsets.back(); - return dremel_data{std::move(new_offsets), - std::move(rep_level), - std::move(def_level), - leaf_col_offset, - leaf_data_size, - max_def_level}; + return dremel_data{ + std::move(new_offsets), std::move(rep_level), std::move(def_level), leaf_data_size}; } /** @@ -1949,7 +2089,7 @@ dremel_data get_dremel_data(column_view h_col, * @param[in] stream CUDA stream to use, default 0 */ void InitPageFragments(PageFragment *frag, - const EncColumnDesc *col_desc, + const parquet_column_device_view *col_desc, int32_t num_fragments, int32_t num_columns, uint32_t fragment_size, @@ -1974,7 +2114,7 @@ void InitPageFragments(PageFragment *frag, */ void InitFragmentStatistics(statistics_group *groups, const PageFragment *fragments, - const EncColumnDesc *col_desc, + const parquet_column_device_view *col_desc, int32_t num_fragments, int32_t num_columns, uint32_t fragment_size, @@ -1999,7 +2139,7 @@ void InitFragmentStatistics(statistics_group *groups, */ void InitEncoderPages(EncColumnChunk *chunks, EncPage *pages, - const EncColumnDesc *col_desc, + const parquet_column_device_view *col_desc, int32_t num_rowgroups, int32_t num_columns, statistics_merge_group *page_grstats, diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 43d144ec980..f9415cf8cc0 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -215,7 +215,7 @@ struct ColumnChunkDesc { /** * @brief Struct describing an encoder column */ -struct EncColumnDesc : stats_column_desc { +struct parquet_column_device_view : stats_column_desc { uint32_t *dict_index; //!< Dictionary index [row] uint32_t *dict_data; //!< Dictionary data (unique row indices) uint8_t physical_type; //!< physical data type @@ -223,12 +223,17 @@ struct EncColumnDesc : stats_column_desc { // TODO (dm): Evaluate if this is sufficient. At 4 bits, this allows a maximum 16 level nesting uint8_t level_bits; //!< bits to encode max definition (lower nibble) & repetition (upper nibble) //!< levels + constexpr uint8_t num_def_level_bits() { return level_bits & 0xf; } + constexpr uint8_t num_rep_level_bits() { return level_bits >> 4; } size_type const *const *nesting_offsets; //!< If column is a nested type, contains offset array of each nesting level size_type const *level_offsets; //!< Offset array for per-row pre-calculated rep/def level values uint8_t const *rep_values; //!< Pre-calculated repetition level values uint8_t const *def_values; //!< Pre-calculated definition level values + uint8_t *nullability; //!< Array of nullability of each nesting level. e.g. nullable[0] is + //!< nullability of parent_column. May be different from col.nullable() in + //!< case of chunked writing. }; constexpr int max_page_fragment_size = 5000; //!< Max number of rows in a page fragment @@ -299,15 +304,15 @@ inline size_t __device__ __host__ GetMaxCompressedBfrSize(size_t uncomp_size, * @brief Struct describing an encoder column chunk */ struct EncColumnChunk { - const EncColumnDesc *col_desc; //!< Column description - PageFragment *fragments; //!< First fragment in chunk - uint8_t *uncompressed_bfr; //!< Uncompressed page data - uint8_t *compressed_bfr; //!< Compressed page data - const statistics_chunk *stats; //!< Fragment statistics - uint32_t bfr_size; //!< Uncompressed buffer size - uint32_t compressed_size; //!< Compressed buffer size - uint32_t start_row; //!< First row of chunk - uint32_t num_rows; //!< Number of rows in chunk + const parquet_column_device_view *col_desc; //!< Column description + PageFragment *fragments; //!< First fragment in chunk + uint8_t *uncompressed_bfr; //!< Uncompressed page data + uint8_t *compressed_bfr; //!< Compressed page data + const statistics_chunk *stats; //!< Fragment statistics + uint32_t bfr_size; //!< Uncompressed buffer size + uint32_t compressed_size; //!< Compressed buffer size + uint32_t start_row; //!< First row of chunk + uint32_t num_rows; //!< Number of rows in chunk uint32_t num_values; //!< Number of values in chunk. Different from num_rows for nested types uint32_t first_fragment; //!< First fragment of chunk uint32_t first_page; //!< First page of chunk @@ -398,9 +403,7 @@ struct dremel_data { rmm::device_uvector rep_level; rmm::device_uvector def_level; - size_type leaf_col_offset; size_type leaf_data_size; - uint8_t max_def_level; }; /** @@ -423,8 +426,9 @@ struct dremel_data { * @return A struct containing dremel data */ dremel_data get_dremel_data(column_view h_col, - std::vector const &level_nullability = {}, - rmm::cuda_stream_view stream = rmm::cuda_stream_default); + rmm::device_uvector const &d_nullability, + std::vector const &nullability, + rmm::cuda_stream_view stream = rmm::cuda_stream_default); /** * @brief Launches kernel for initializing encoder page fragments @@ -438,7 +442,7 @@ dremel_data get_dremel_data(column_view h_col, * @param[in] stream CUDA stream to use, default 0 */ void InitPageFragments(PageFragment *frag, - const EncColumnDesc *col_desc, + const parquet_column_device_view *col_desc, int32_t num_fragments, int32_t num_columns, uint32_t fragment_size, @@ -458,7 +462,7 @@ void InitPageFragments(PageFragment *frag, */ void InitFragmentStatistics(statistics_group *groups, const PageFragment *fragments, - const EncColumnDesc *col_desc, + const parquet_column_device_view *col_desc, int32_t num_fragments, int32_t num_columns, uint32_t fragment_size, @@ -478,7 +482,7 @@ void InitFragmentStatistics(statistics_group *groups, */ void InitEncoderPages(EncColumnChunk *chunks, EncPage *pages, - const EncColumnDesc *col_desc, + const parquet_column_device_view *col_desc, int32_t num_rowgroups, int32_t num_columns, statistics_merge_group *page_grstats = nullptr, diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index a645ca0fd91..3dbcd5e1655 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -70,364 +70,453 @@ parquet::Compression to_parquet_compression(compression_type compression) } } -std::vector> get_per_column_nullability(table_view const &table, - std::vector const &col_nullable) -{ - auto get_depth = [](column_view const &col) { - column_view curr_col = col; - uint16_t depth = 1; - while (curr_col.type().id() == type_id::LIST) { - depth++; - curr_col = lists_column_view{curr_col}.child(); +} // namespace + +struct linked_column_view; + +using LinkedColPtr = std::shared_ptr; +using LinkedColVector = std::vector; + +/** + * @brief column_view with the added member pointer to the parent of this column. + * + */ +struct linked_column_view : public column_view { + // TODO(cp): we are currently keeping all column_view children info multiple times - once for each + // copy of this object. Options: + // 1. Inherit from column_view_base. Only lose out on children vector. That is not needed. + // 2. Don't inherit at all. make linked_column_view keep a reference wrapper to its column_view + linked_column_view(column_view const &col) : column_view(col), parent(nullptr) + { + for (auto child_it = col.child_begin(); child_it < col.child_end(); ++child_it) { + children.push_back(std::make_shared(this, *child_it)); } - return depth; - }; + } - // for each column, check depth and add subsequent bool values to its nullable vector - std::vector> per_column_nullability; - auto null_it = col_nullable.begin(); - auto const_it = thrust::make_constant_iterator(true); - for (auto const &col : table) { - uint16_t depth = get_depth(col); - if (col_nullable.empty()) { - // If no per-column nullability is specified then assume that all columns are nullable - per_column_nullability.emplace_back(const_it, const_it + depth); - } else { - CUDF_EXPECTS( - null_it + depth <= col_nullable.end(), - "Mismatch between size of column nullability passed in user_metadata_with_nullability and " - "number of null masks expected in table. Expected more values in passed metadata"); - per_column_nullability.emplace_back(null_it, null_it + depth); - null_it += depth; + linked_column_view(linked_column_view *parent, column_view const &col) + : column_view(col), parent(parent) + { + for (auto child_it = col.child_begin(); child_it < col.child_end(); ++child_it) { + children.push_back(std::make_shared(this, *child_it)); } } - CUDF_EXPECTS( - null_it == col_nullable.end(), - "Mismatch between size of column nullability passed in user_metadata_with_nullability and " - "number of null masks expected in table. Too many values in passed metadata"); - return per_column_nullability; -} + + linked_column_view *parent; //!< Pointer to parent of this column. Nullptr if root + LinkedColVector children; +}; /** - * @brief Get the leaf column + * @brief Converts all column_views of a table into linked_column_views * - * Returns the dtype of the leaf column when `col` is a list column. + * @param table table of columns to convert + * @return Vector of converted linked_column_views */ -column_view get_leaf_col(column_view col) +LinkedColVector input_table_to_linked_columns(table_view const &table) { - column_view curr_col = col; - while (curr_col.type().id() == type_id::LIST) { curr_col = lists_column_view{curr_col}.child(); } - return curr_col; -} + LinkedColVector result; + for (column_view const &col : table) { + result.emplace_back(std::make_shared(col)); + } -} // namespace + return result; +} /** - * @brief Helper kernel for converting string data/offsets into nvstrdesc - * REMOVEME: Once we eliminate the legacy readers/writers, the kernels could be - * made to use the native offset+data layout. + * @brief Extends SchemaElement to add members required in constructing parquet_column_view + * + * Added members are: + * 1. leaf_column: Pointer to leaf linked_column_view which points to the corresponding data stream + * of a leaf schema node. For non-leaf struct node, this is nullptr. + * 2. stats_dtype: datatype for statistics calculation required for the data stream of a leaf node. + * 3. ts_scale: scale to multiply or divide timestamp by in order to convert timestamp to parquet + * supported types */ -__global__ void stringdata_to_nvstrdesc(gpu::nvstrdesc_s *dst, - const size_type *offsets, - const char *strdata, - const uint32_t *nulls, - size_type column_size) -{ - size_type row = blockIdx.x * blockDim.x + threadIdx.x; - if (row < column_size) { - uint32_t is_valid = (nulls) ? (nulls[row >> 5] >> (row & 0x1f)) & 1 : 1; - size_t count; - const char *ptr; - if (is_valid) { - size_type cur = offsets[row]; - size_type next = offsets[row + 1]; - ptr = strdata + cur; - count = (next > cur) ? next - cur : 0; +struct schema_tree_node : public SchemaElement { + LinkedColPtr leaf_column; + statistics_dtype stats_dtype; + int32_t ts_scale; + + // TODO(fut): Think about making schema a class that holds a vector of schema_tree_nodes. The + // function construct_schema_tree could be its constructor. It can have method to get the per + // column nullability given a schema node index corresponding to a leaf schema. Much easier than + // that is a method to get path in schema, given a leaf node +}; + +struct leaf_schema_fn { + schema_tree_node &col_schema; + LinkedColPtr const &col; + column_in_metadata const &col_meta; + bool timestamp_is_int96; + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = Type::BOOLEAN; + col_schema.stats_dtype = statistics_dtype::dtype_bool; + } + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = Type::INT32; + col_schema.converted_type = ConvertedType::INT_8; + col_schema.stats_dtype = statistics_dtype::dtype_int8; + } + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = Type::INT32; + col_schema.converted_type = ConvertedType::INT_16; + col_schema.stats_dtype = statistics_dtype::dtype_int16; + } + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = Type::INT32; + col_schema.stats_dtype = statistics_dtype::dtype_int32; + } + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = Type::INT64; + col_schema.stats_dtype = statistics_dtype::dtype_int64; + } + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = Type::INT32; + col_schema.converted_type = ConvertedType::UINT_8; + col_schema.stats_dtype = statistics_dtype::dtype_int8; + } + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = Type::INT32; + col_schema.converted_type = ConvertedType::UINT_16; + col_schema.stats_dtype = statistics_dtype::dtype_int16; + } + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = Type::INT32; + col_schema.converted_type = ConvertedType::UINT_32; + col_schema.stats_dtype = statistics_dtype::dtype_int32; + } + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = Type::INT64; + col_schema.converted_type = ConvertedType::UINT_64; + col_schema.stats_dtype = statistics_dtype::dtype_int64; + } + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = Type::FLOAT; + col_schema.stats_dtype = statistics_dtype::dtype_float32; + } + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = Type::DOUBLE; + col_schema.stats_dtype = statistics_dtype::dtype_float64; + } + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = Type::BYTE_ARRAY; + col_schema.converted_type = ConvertedType::UTF8; + col_schema.stats_dtype = statistics_dtype::dtype_string; + } + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = Type::INT32; + col_schema.converted_type = ConvertedType::DATE; + col_schema.stats_dtype = statistics_dtype::dtype_int32; + } + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = (timestamp_is_int96) ? Type::INT96 : Type::INT64; + col_schema.converted_type = + (timestamp_is_int96) ? ConvertedType::UNKNOWN : ConvertedType::TIMESTAMP_MILLIS; + col_schema.stats_dtype = statistics_dtype::dtype_timestamp64; + col_schema.ts_scale = 1000; + } + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = (timestamp_is_int96) ? Type::INT96 : Type::INT64; + col_schema.converted_type = + (timestamp_is_int96) ? ConvertedType::UNKNOWN : ConvertedType::TIMESTAMP_MILLIS; + col_schema.stats_dtype = statistics_dtype::dtype_timestamp64; + } + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = (timestamp_is_int96) ? Type::INT96 : Type::INT64; + col_schema.converted_type = + (timestamp_is_int96) ? ConvertedType::UNKNOWN : ConvertedType::TIMESTAMP_MICROS; + col_schema.stats_dtype = statistics_dtype::dtype_timestamp64; + } + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = (timestamp_is_int96) ? Type::INT96 : Type::INT64; + col_schema.converted_type = + (timestamp_is_int96) ? ConvertedType::UNKNOWN : ConvertedType::TIMESTAMP_MICROS; + col_schema.stats_dtype = statistics_dtype::dtype_timestamp64; + col_schema.ts_scale = -1000; // negative value indicates division by absolute value + } + + // unsupported outside cudf for parquet 1.0. + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = Type::INT32; + col_schema.converted_type = ConvertedType::TIME_MILLIS; + col_schema.stats_dtype = statistics_dtype::dtype_int64; + } + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = Type::INT64; + col_schema.converted_type = ConvertedType::TIME_MILLIS; + col_schema.stats_dtype = statistics_dtype::dtype_int64; + col_schema.ts_scale = 1000; + } + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = Type::INT64; + col_schema.converted_type = ConvertedType::TIME_MILLIS; + col_schema.stats_dtype = statistics_dtype::dtype_int64; + } + + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = Type::INT64; + col_schema.converted_type = ConvertedType::TIME_MICROS; + col_schema.stats_dtype = statistics_dtype::dtype_int64; + } + + // unsupported outside cudf for parquet 1.0. + template + std::enable_if_t::value, void> operator()() + { + col_schema.type = Type::INT64; + col_schema.converted_type = ConvertedType::TIME_MICROS; + col_schema.stats_dtype = statistics_dtype::dtype_int64; + col_schema.ts_scale = -1000; // negative value indicates division by absolute value + } + + template + std::enable_if_t(), void> operator()() + { + if (std::is_same::value) { + col_schema.type = Type::INT32; + col_schema.stats_dtype = statistics_dtype::dtype_int32; + } else if (std::is_same::value) { + col_schema.type = Type::INT64; + col_schema.stats_dtype = statistics_dtype::dtype_decimal64; } else { - ptr = nullptr; - count = 0; + CUDF_FAIL("Unsupported fixed point type for parquet writer"); } - dst[row].ptr = ptr; - dst[row].count = count; + col_schema.converted_type = ConvertedType::DECIMAL; + col_schema.decimal_scale = -col->type().scale(); // parquet and cudf disagree about scale signs + CUDF_EXPECTS(col_meta.is_decimal_precision_set(), + "Precision must be specified for decimal columns"); + CUDF_EXPECTS(col_meta.get_decimal_precision() >= col_schema.decimal_scale, + "Precision must be equal to or greater than scale!"); + col_schema.decimal_precision = col_meta.get_decimal_precision(); } -} -/** - * @brief Helper class that adds parquet-specific column info - */ -class parquet_column_view { - public: - /** - * @brief Constructor that extracts out the string position + length pairs - * for building dictionaries for string columns - */ - explicit parquet_column_view(size_t id, - column_view const &col, - std::vector const &nullability, - const table_metadata *metadata, - bool int96_timestamps, - std::vector const &decimal_precision, - uint &decimal_precision_idx, - rmm::cuda_stream_view stream) - : _col(col), - _leaf_col(get_leaf_col(col)), - _id(id), - _string_type(_leaf_col.type().id() == type_id::STRING), - _list_type(col.type().id() == type_id::LIST), - _type_width((_string_type || _list_type) ? 0 : cudf::size_of(col.type())), - _row_count(col.size()), - _null_count(_leaf_col.null_count()), - _data(col.head() + col.offset() * _type_width), - _nulls(_leaf_col.nullable() ? _leaf_col.null_mask() : nullptr), - _offset(col.offset()), - _converted_type(ConvertedType::UNKNOWN), - _ts_scale(0), - _dremel_offsets(0, stream), - _rep_level(0, stream), - _def_level(0, stream), - _nullability(nullability) + template + std::enable_if_t(), void> operator()() { - switch (_leaf_col.type().id()) { - case cudf::type_id::INT8: - _physical_type = Type::INT32; - _converted_type = ConvertedType::INT_8; - _stats_dtype = statistics_dtype::dtype_int8; - break; - case cudf::type_id::INT16: - _physical_type = Type::INT32; - _converted_type = ConvertedType::INT_16; - _stats_dtype = statistics_dtype::dtype_int16; - break; - case cudf::type_id::INT32: - _physical_type = Type::INT32; - _stats_dtype = statistics_dtype::dtype_int32; - break; - case cudf::type_id::INT64: - _physical_type = Type::INT64; - _stats_dtype = statistics_dtype::dtype_int64; - break; - case cudf::type_id::UINT8: - _physical_type = Type::INT32; - _converted_type = ConvertedType::UINT_8; - _stats_dtype = statistics_dtype::dtype_int8; - break; - case cudf::type_id::UINT16: - _physical_type = Type::INT32; - _converted_type = ConvertedType::UINT_16; - _stats_dtype = statistics_dtype::dtype_int16; - break; - case cudf::type_id::UINT32: - _physical_type = Type::INT32; - _converted_type = ConvertedType::UINT_32; - _stats_dtype = statistics_dtype::dtype_int32; - break; - case cudf::type_id::UINT64: - _physical_type = Type::INT64; - _converted_type = ConvertedType::UINT_64; - _stats_dtype = statistics_dtype::dtype_int64; - break; - case cudf::type_id::FLOAT32: - _physical_type = Type::FLOAT; - _stats_dtype = statistics_dtype::dtype_float32; - break; - case cudf::type_id::FLOAT64: - _physical_type = Type::DOUBLE; - _stats_dtype = statistics_dtype::dtype_float64; - break; - case cudf::type_id::BOOL8: - _physical_type = Type::BOOLEAN; - _stats_dtype = statistics_dtype::dtype_bool; - break; - // unsupported outside cudf for parquet 1.0. - case cudf::type_id::DURATION_DAYS: - _physical_type = Type::INT32; - _converted_type = ConvertedType::TIME_MILLIS; - _stats_dtype = statistics_dtype::dtype_int64; - break; - case cudf::type_id::DURATION_SECONDS: - _physical_type = Type::INT64; - _converted_type = ConvertedType::TIME_MILLIS; - _stats_dtype = statistics_dtype::dtype_int64; - _ts_scale = 1000; - break; - case cudf::type_id::DURATION_MILLISECONDS: - _physical_type = Type::INT64; - _converted_type = ConvertedType::TIME_MILLIS; - _stats_dtype = statistics_dtype::dtype_int64; - break; - case cudf::type_id::DURATION_MICROSECONDS: - _physical_type = Type::INT64; - _converted_type = ConvertedType::TIME_MICROS; - _stats_dtype = statistics_dtype::dtype_int64; - break; - // unsupported outside cudf for parquet 1.0. - case cudf::type_id::DURATION_NANOSECONDS: - _physical_type = Type::INT64; - _converted_type = ConvertedType::TIME_MICROS; - _stats_dtype = statistics_dtype::dtype_int64; - _ts_scale = -1000; // negative value indicates division by absolute value - break; - case cudf::type_id::TIMESTAMP_DAYS: - _physical_type = Type::INT32; - _converted_type = ConvertedType::DATE; - _stats_dtype = statistics_dtype::dtype_int32; - break; - case cudf::type_id::TIMESTAMP_SECONDS: - _physical_type = int96_timestamps ? Type::INT96 : Type::INT64; - _converted_type = ConvertedType::TIMESTAMP_MILLIS; - _stats_dtype = statistics_dtype::dtype_timestamp64; - _ts_scale = 1000; - break; - case cudf::type_id::TIMESTAMP_MILLISECONDS: - _physical_type = int96_timestamps ? Type::INT96 : Type::INT64; - _converted_type = ConvertedType::TIMESTAMP_MILLIS; - _stats_dtype = statistics_dtype::dtype_timestamp64; - break; - case cudf::type_id::TIMESTAMP_MICROSECONDS: - _physical_type = int96_timestamps ? Type::INT96 : Type::INT64; - _converted_type = ConvertedType::TIMESTAMP_MICROS; - _stats_dtype = statistics_dtype::dtype_timestamp64; - break; - case cudf::type_id::TIMESTAMP_NANOSECONDS: - _physical_type = int96_timestamps ? Type::INT96 : Type::INT64; - _converted_type = ConvertedType::TIMESTAMP_MICROS; - _stats_dtype = statistics_dtype::dtype_timestamp64; - _ts_scale = -1000; // negative value indicates division by absolute value - break; - case cudf::type_id::STRING: - _physical_type = Type::BYTE_ARRAY; - _converted_type = ConvertedType::UTF8; - _stats_dtype = statistics_dtype::dtype_string; - break; - case cudf::type_id::DECIMAL32: - _physical_type = Type::INT32; - _converted_type = ConvertedType::DECIMAL; - _stats_dtype = statistics_dtype::dtype_int32; - _decimal_scale = -_leaf_col.type().scale(); // parquet and cudf disagree about scale signs - CUDF_EXPECTS(decimal_precision.size() > decimal_precision_idx, - "Not enough decimal precision values passed for data!"); - CUDF_EXPECTS(decimal_precision[decimal_precision_idx] >= _decimal_scale, - "Precision must be equal to or greater than scale!"); - _decimal_precision = decimal_precision[decimal_precision_idx++]; - break; - case cudf::type_id::DECIMAL64: - _physical_type = Type::INT64; - _converted_type = ConvertedType::DECIMAL; - _stats_dtype = statistics_dtype::dtype_decimal64; - _decimal_scale = -_leaf_col.type().scale(); // parquet and cudf disagree about scale signs - CUDF_EXPECTS(decimal_precision.size() > decimal_precision_idx, - "Not enough decimal precision values passed for data!"); - CUDF_EXPECTS(decimal_precision[decimal_precision_idx] >= _decimal_scale, - "Precision must be equal to or greater than scale!"); - _decimal_precision = decimal_precision[decimal_precision_idx++]; - break; - default: - _physical_type = UNDEFINED_TYPE; - _stats_dtype = dtype_none; - break; - } - size_type leaf_col_offset = col.offset(); - _data_count = col.size(); - if (_list_type) { - // Top level column's offsets are not applied to all children. Get the effective offset and - // size of the leaf column - // Calculate row offset into dremel data (repetition/definition values) and the respective - // definition and repetition levels - gpu::dremel_data dremel = gpu::get_dremel_data(col, _nullability, stream); - _dremel_offsets = std::move(dremel.dremel_offsets); - _rep_level = std::move(dremel.rep_level); - _def_level = std::move(dremel.def_level); - leaf_col_offset = dremel.leaf_col_offset; - _data_count = dremel.leaf_data_size; - _max_def_level = dremel.max_def_level; - - _type_width = (is_fixed_width(_leaf_col.type())) ? cudf::size_of(_leaf_col.type()) : 0; - _data = (is_fixed_width(_leaf_col.type())) - ? _leaf_col.head() + leaf_col_offset * _type_width - : nullptr; - - // Calculate nesting levels - column_view curr_col = col; - _nesting_levels = 0; - while (curr_col.type().id() == type_id::LIST) { - lists_column_view list_col(curr_col); - _nesting_levels++; - curr_col = list_col.child(); - } + CUDF_FAIL("This functor is only meant for physical data types"); + } - // Update level nullability if no nullability was passed in. - curr_col = col; - if (_nullability.empty()) { - while (curr_col.type().id() == type_id::LIST) { - lists_column_view list_col(curr_col); - _nullability.push_back(list_col.null_mask() != nullptr); - curr_col = list_col.child(); + template + std::enable_if_t(), void> operator()() + { + CUDF_FAIL("Dictionary columns are not supported for writing"); + } +}; + +/** + * @brief Construct schema from input columns and per-column input options + * + * Recursively traverses through linked_columns and corresponding metadata to construct schema tree. + * The resulting schema tree is stored in a vector in pre-order traversal order. + */ +std::vector construct_schema_tree(LinkedColVector const &linked_columns, + table_input_metadata const &metadata, + bool single_write_mode, + bool int96_timestamps) +{ + std::vector schema; + schema_tree_node root{}; + root.type = UNDEFINED_TYPE; + root.repetition_type = NO_REPETITION_TYPE; + root.name = "schema"; + root.num_children = linked_columns.size(); + root.parent_idx = -1; // root schema has no parent + schema.push_back(std::move(root)); + + std::function add_schema = + [&](LinkedColPtr const &col, column_in_metadata const &col_meta, size_t parent_idx) { + bool col_nullable = [&]() { + if (single_write_mode) { + return col->nullable(); + } else { + if (col_meta.is_nullability_defined()) { + if (col_meta.nullable() == false) { + CUDF_EXPECTS( + col->nullable() == false, + "Mismatch in metadata prescribed nullability and input column nullability. " + "Metadata for nullable input column cannot prescribe nullability = false"); + } + return col_meta.nullable(); + } else { + // For chunked write, when not provided nullability, we assume the worst case scenario + // that all columns are nullable. + return true; + } + } + }(); + + if (col->type().id() == type_id::STRUCT) { + // if struct, add current and recursively call for all children + schema_tree_node struct_schema{}; + struct_schema.repetition_type = + col_nullable ? FieldRepetitionType::OPTIONAL : FieldRepetitionType::REQUIRED; + + struct_schema.name = (schema[parent_idx].name == "list") ? "element" : col_meta.get_name(); + struct_schema.num_children = col->num_children(); + struct_schema.parent_idx = parent_idx; + schema.push_back(std::move(struct_schema)); + + auto struct_node_index = schema.size() - 1; + // for (auto child_it = col->children.begin(); child_it < col->children.end(); child_it++) { + // add_schema(*child_it, struct_node_index); + // } + CUDF_EXPECTS(col->num_children() == static_cast(col_meta.num_children()), + "Mismatch in number of child columns between input table and metadata"); + for (size_t i = 0; i < col->children.size(); ++i) { + add_schema(col->children[i], col_meta.child(i), struct_node_index); + } + } else if (col->type().id() == type_id::LIST) { + // List schema is denoted by two levels for each nesting level and one final level for leaf. + // The top level is the same name as the column name. + // So e.g. List> is denoted in the schema by + // "col_name" : { "list" : { "element" : { "list" : { "element" } } } } + + schema_tree_node list_schema_1{}; + list_schema_1.converted_type = ConvertedType::LIST; + list_schema_1.repetition_type = + col_nullable ? FieldRepetitionType::OPTIONAL : FieldRepetitionType::REQUIRED; + list_schema_1.name = (schema[parent_idx].name == "list") ? "element" : col_meta.get_name(); + list_schema_1.num_children = 1; + list_schema_1.parent_idx = parent_idx; + schema.push_back(std::move(list_schema_1)); + + schema_tree_node list_schema_2{}; + list_schema_2.repetition_type = FieldRepetitionType::REPEATED; + list_schema_2.name = "list"; + list_schema_2.num_children = 1; + list_schema_2.parent_idx = schema.size() - 1; // Parent is list_schema_1, last added. + schema.push_back(std::move(list_schema_2)); + + CUDF_EXPECTS(col_meta.num_children() == 2, + "List column's metadata should have exactly two children"); + + add_schema(col->children[lists_column_view::child_column_index], + col_meta.child(lists_column_view::child_column_index), + schema.size() - 1); + } else { + // if leaf, add current + if (col->type().id() == type_id::STRING) { + CUDF_EXPECTS(col_meta.num_children() == 2 or col_meta.num_children() == 0, + "String column's corresponding metadata should have zero or two children"); + } else { + CUDF_EXPECTS(col_meta.num_children() == 0, + "Leaf column's corresponding metadata cannot have children"); } - _nullability.push_back(curr_col.null_mask() != nullptr); - } - stream.synchronize(); - } else { - if (_nullability.empty()) { _nullability = {col.nullable()}; } - _max_def_level = (_nullability[0]) ? 1 : 0; - } - if (_string_type && _data_count > 0) { - strings_column_view view{_leaf_col}; - _indexes = rmm::device_buffer(_data_count * sizeof(gpu::nvstrdesc_s), stream); - - stringdata_to_nvstrdesc<<<((_data_count - 1) >> 8) + 1, 256, 0, stream.value()>>>( - reinterpret_cast(_indexes.data()), - view.offsets().data() + leaf_col_offset, - view.chars().data(), - _nulls, - _data_count); - _data = _indexes.data(); - - stream.synchronize(); - } + schema_tree_node col_schema{}; - // Generating default name if name isn't present in metadata - if (metadata && _id < metadata->column_names.size()) { - _name = metadata->column_names[_id]; - } else { - _name = "_col" + std::to_string(_id); - } - _path_in_schema.push_back(_name); + bool timestamp_is_int96 = int96_timestamps or col_meta.is_enabled_int96_timestamps(); + + cudf::type_dispatcher(col->type(), + leaf_schema_fn{col_schema, col, col_meta, timestamp_is_int96}); + + col_schema.repetition_type = col_nullable ? OPTIONAL : REQUIRED; + col_schema.name = (schema[parent_idx].name == "list") ? "element" : col_meta.get_name(); + col_schema.parent_idx = parent_idx; + col_schema.leaf_column = col; + schema.push_back(col_schema); + } + }; + + CUDF_EXPECTS(metadata.column_metadata.size() == linked_columns.size(), + "Mismatch in the number of columns and the corresponding metadata elements"); + // Add all linked_columns to schema using parent_idx = 0 (root) + for (size_t i = 0; i < linked_columns.size(); ++i) { + add_schema(linked_columns[i], metadata.column_metadata[i], 0); } - auto is_string() const noexcept { return _string_type; } - auto is_list() const noexcept { return _list_type; } - size_t type_width() const noexcept { return _type_width; } - size_t row_count() const noexcept { return _row_count; } - size_t data_count() const noexcept { return _data_count; } - size_t null_count() const noexcept { return _null_count; } - bool nullable() const { return _nullability.back(); } - void const *data() const noexcept { return _data; } - uint32_t const *nulls() const noexcept { return _nulls; } - size_type offset() const noexcept { return _offset; } - bool level_nullable(size_t level) const { return _nullability[level]; } - int32_t decimal_scale() const noexcept { return _decimal_scale; } - uint8_t decimal_precision() const noexcept { return _decimal_precision; } - - // List related data - column_view cudf_col() const noexcept { return _col; } - column_view leaf_col() const noexcept { return _leaf_col; } - size_type nesting_levels() const noexcept { return _nesting_levels; } - size_type const *level_offsets() const noexcept { return _dremel_offsets.data(); } - uint8_t const *repetition_levels() const noexcept { return _rep_level.data(); } - uint8_t const *definition_levels() const noexcept { return _def_level.data(); } - uint16_t max_def_level() const noexcept { return _max_def_level; } - void set_def_level(uint16_t def_level) { _max_def_level = def_level; } - - auto name() const noexcept { return _name; } - auto physical_type() const noexcept { return _physical_type; } - auto converted_type() const noexcept { return _converted_type; } - auto stats_type() const noexcept { return _stats_dtype; } - int32_t ts_scale() const noexcept { return _ts_scale; } - void set_path_in_schema(std::vector path) { _path_in_schema = std::move(path); } - auto get_path_in_schema() const noexcept { return _path_in_schema; } - - // Dictionary management + return schema; +} + +/** + * @brief Class to store parquet specific information for one data stream. + * + * Contains information about a single data stream. In case of struct columns, a data stream is one + * of the child leaf columns that contains data. + * e.g. A column Struct> contains 2 data streams: + * - Struct + * - Struct> + * + */ +struct parquet_column_view { + parquet_column_view(schema_tree_node const &schema_node, + std::vector const &schema_tree, + rmm::cuda_stream_view stream); + + column_view leaf_column_view() const; + gpu::parquet_column_device_view get_device_view(); + + column_view cudf_column_view() const { return cudf_col; } + parquet::Type physical_type() const { return schema_node.type; } + + std::vector const &get_path_in_schema() { return path_in_schema; } + + // LIST related member functions + uint8_t max_def_level() const noexcept { return _max_def_level; } + uint8_t max_rep_level() const noexcept { return _max_rep_level; } + bool is_list() const noexcept { return _is_list; } + + // Dictionary related member functions uint32_t *get_dict_data() { return (_dict_data.size()) ? _dict_data.data().get() : nullptr; } uint32_t *get_dict_index() { return (_dict_index.size()) ? _dict_index.data().get() : nullptr; } void use_dictionary(bool use_dict) { _dictionary_used = use_dict; } @@ -448,56 +537,185 @@ class parquet_column_view { } private: - // cudf data column - column_view _col; - column_view _leaf_col; - - // Identifier within set of columns - size_t _id = 0; - bool _string_type = false; - bool _list_type = false; - - size_t _type_width = 0; - size_t _row_count = 0; - size_t _data_count = 0; - size_t _null_count = 0; - void const *_data = nullptr; - uint32_t const *_nulls = nullptr; - size_type _offset = 0; - - // parquet-related members - std::string _name{}; - Type _physical_type; - ConvertedType _converted_type; - statistics_dtype _stats_dtype; - int32_t _ts_scale; - std::vector _path_in_schema; - - // Dictionary-related members - bool _dictionary_used = false; - rmm::device_vector _dict_data; - rmm::device_vector _dict_index; + // Schema related members + schema_tree_node schema_node; + std::vector path_in_schema; + uint8_t _max_def_level = 0; + uint8_t _max_rep_level = 0; + rmm::device_uvector _d_nullability; + + column_view cudf_col; // List-related members + bool _is_list; rmm::device_uvector _dremel_offsets; ///< For each row, the absolute offset into the repetition and definition ///< level vectors. O(num rows) rmm::device_uvector _rep_level; rmm::device_uvector _def_level; - std::vector _nullability; - size_type _max_def_level = -1; - size_type _nesting_levels = 0; + std::vector _nullability; + size_type _data_count = 0; - // String-related members - rmm::device_buffer _indexes; - - // Decimal-related members - int32_t _decimal_scale = 0; - uint8_t _decimal_precision = 0; + // Dictionary related members + bool _dictionary_used = false; + rmm::device_vector _dict_data; + rmm::device_vector _dict_index; }; +parquet_column_view::parquet_column_view(schema_tree_node const &schema_node, + std::vector const &schema_tree, + rmm::cuda_stream_view stream) + : schema_node(schema_node), + _d_nullability(0, stream), + _dremel_offsets(0, stream), + _rep_level(0, stream), + _def_level(0, stream) +{ + // Construct single inheritance column_view from linked_column_view + auto curr_col = schema_node.leaf_column.get(); + column_view single_inheritance_cudf_col = *curr_col; + while (curr_col->parent) { + auto const &parent = *curr_col->parent; + + // For list columns, we still need to retain the offset child column. + auto children = + (parent.type().id() == type_id::LIST) + ? std::vector{parent.child(lists_column_view::offsets_column_index), + single_inheritance_cudf_col} + : std::vector{single_inheritance_cudf_col}; + + single_inheritance_cudf_col = column_view(parent.type(), + parent.size(), + parent.head(), + parent.null_mask(), + UNKNOWN_NULL_COUNT, + parent.offset(), + children); + + curr_col = curr_col->parent; + } + cudf_col = single_inheritance_cudf_col; + + // Construct path_in_schema by travelling up in the schema_tree + std::vector path; + auto curr_schema_node = schema_node; + do { + path.push_back(curr_schema_node.name); + if (curr_schema_node.parent_idx != -1) { + curr_schema_node = schema_tree[curr_schema_node.parent_idx]; + } + } while (curr_schema_node.parent_idx != -1); + path_in_schema = std::vector(path.crbegin(), path.crend()); + + // Calculate max definition level by counting the number of levels that are optional (nullable) + // and max repetition level by counting the number of REPEATED levels in this column's hierarchy + uint16_t max_def_level = 0; + uint16_t max_rep_level = 0; + curr_schema_node = schema_node; + while (curr_schema_node.parent_idx != -1) { + if (curr_schema_node.repetition_type == parquet::REPEATED or + curr_schema_node.repetition_type == parquet::OPTIONAL) { + ++max_def_level; + } + if (curr_schema_node.repetition_type == parquet::REPEATED) { ++max_rep_level; } + curr_schema_node = schema_tree[curr_schema_node.parent_idx]; + } + CUDF_EXPECTS(max_def_level < 256, "Definition levels above 255 are not supported"); + CUDF_EXPECTS(max_rep_level < 256, "Definition levels above 255 are not supported"); + + _max_def_level = max_def_level; + _max_rep_level = max_rep_level; + + // Construct nullability vector using repetition_type from schema. + std::vector r_nullability; + curr_schema_node = schema_node; + while (curr_schema_node.parent_idx != -1) { + if (not curr_schema_node.is_stub()) { + r_nullability.push_back(curr_schema_node.repetition_type == FieldRepetitionType::OPTIONAL); + } + curr_schema_node = schema_tree[curr_schema_node.parent_idx]; + } + _nullability = std::vector(r_nullability.crbegin(), r_nullability.crend()); + // TODO(cp): Explore doing this for all columns in a single go outside this ctor. Maybe using + // hostdevice_vector. Currently this involves a cudaMemcpyAsync for each column. + _d_nullability = rmm::device_uvector(_nullability.size(), stream); + CUDA_TRY(cudaMemcpyAsync(_d_nullability.data(), + _nullability.data(), + _nullability.size() * sizeof(uint8_t), + cudaMemcpyHostToDevice, + stream.value())); + + _is_list = (_max_rep_level > 0); + + if (cudf_col.size() == 0) { return; } + + if (_is_list) { + // Top level column's offsets are not applied to all children. Get the effective offset and + // size of the leaf column + // Calculate row offset into dremel data (repetition/definition values) and the respective + // definition and repetition levels + gpu::dremel_data dremel = gpu::get_dremel_data(cudf_col, _d_nullability, _nullability, stream); + _dremel_offsets = std::move(dremel.dremel_offsets); + _rep_level = std::move(dremel.rep_level); + _def_level = std::move(dremel.def_level); + _data_count = dremel.leaf_data_size; // Needed for knowing what size dictionary to allocate + + stream.synchronize(); + } else { + // For non-list struct, the size of the root column is the same as the size of the leaf column + _data_count = cudf_col.size(); + } +} + +column_view parquet_column_view::leaf_column_view() const +{ + auto col = cudf_col; + while (cudf::is_nested(col.type())) { + if (col.type().id() == type_id::LIST) { + col = col.child(lists_column_view::child_column_index); + } else if (col.type().id() == type_id::STRUCT) { + col = col.child(0); // Stored cudf_col has only one child if struct + } + } + return col; +} + +gpu::parquet_column_device_view parquet_column_view::get_device_view() +{ + column_view col = leaf_column_view(); + auto desc = gpu::parquet_column_device_view{}; // Zero out all fields + desc.stats_dtype = schema_node.stats_dtype; + desc.ts_scale = schema_node.ts_scale; + + // TODO (dm): Enable dictionary for list after refactor + if (physical_type() != BOOLEAN && physical_type() != UNDEFINED_TYPE && !is_list()) { + alloc_dictionary(_data_count); + desc.dict_index = get_dict_index(); + desc.dict_data = get_dict_data(); + } + + if (is_list()) { + desc.level_offsets = _dremel_offsets.data(); + desc.rep_values = _rep_level.data(); + desc.def_values = _def_level.data(); + } + desc.num_rows = cudf_col.size(); + desc.physical_type = static_cast(physical_type()); + auto count_bits = [](uint16_t number) { + int16_t nbits = 0; + while (number > 0) { + nbits++; + number >>= 1; + } + return nbits; + }; + desc.level_bits = count_bits(max_rep_level()) << 4 | count_bits(max_def_level()); + desc.nullability = _d_nullability.data(); + return desc; +} + void writer::impl::init_page_fragments(hostdevice_vector &frag, - hostdevice_vector &col_desc, + hostdevice_vector &col_desc, uint32_t num_columns, uint32_t num_fragments, uint32_t num_rows, @@ -513,12 +731,13 @@ void writer::impl::init_page_fragments(hostdevice_vector &fra frag.device_to_host(stream, true); } -void writer::impl::gather_fragment_statistics(statistics_chunk *frag_stats_chunk, - hostdevice_vector &frag, - hostdevice_vector &col_desc, - uint32_t num_columns, - uint32_t num_fragments, - uint32_t fragment_size) +void writer::impl::gather_fragment_statistics( + statistics_chunk *frag_stats_chunk, + hostdevice_vector &frag, + hostdevice_vector &col_desc, + uint32_t num_columns, + uint32_t num_fragments, + uint32_t fragment_size) { rmm::device_vector frag_stats_group(num_fragments * num_columns); @@ -534,11 +753,12 @@ void writer::impl::gather_fragment_statistics(statistics_chunk *frag_stats_chunk stream.synchronize(); } -void writer::impl::build_chunk_dictionaries(hostdevice_vector &chunks, - hostdevice_vector &col_desc, - uint32_t num_rowgroups, - uint32_t num_columns, - uint32_t num_dictionaries) +void writer::impl::build_chunk_dictionaries( + hostdevice_vector &chunks, + hostdevice_vector &col_desc, + uint32_t num_rowgroups, + uint32_t num_columns, + uint32_t num_dictionaries) { size_t dict_scratch_size = (size_t)num_dictionaries * gpu::kDictScratchSize; rmm::device_vector dict_scratch(dict_scratch_size / sizeof(uint32_t)); @@ -560,7 +780,7 @@ void writer::impl::build_chunk_dictionaries(hostdevice_vector &chunks, - hostdevice_vector &col_desc, + hostdevice_vector &col_desc, gpu::EncPage *pages, statistics_chunk *page_stats, statistics_chunk *frag_stats, @@ -651,10 +871,11 @@ writer::impl::impl(std::unique_ptr sink, stats_granularity_(options.get_stats_level()), int96_timestamps(options.is_enabled_int96_timestamps()), out_sink_(std::move(sink)), - decimal_precision(options.get_decimal_precision()), - single_write_mode(mode == SingleWriteMode::YES), - user_metadata(options.get_metadata()) + single_write_mode(mode == SingleWriteMode::YES) { + if (options.get_metadata()) { + table_meta = std::make_unique(*options.get_metadata()); + } init_state(); } @@ -668,15 +889,12 @@ writer::impl::impl(std::unique_ptr sink, compression_(to_parquet_compression(options.get_compression())), stats_granularity_(options.get_stats_level()), int96_timestamps(options.is_enabled_int96_timestamps()), - decimal_precision(options.get_decimal_precision()), single_write_mode(mode == SingleWriteMode::YES), out_sink_(std::move(sink)) { - if (options.get_nullable_metadata() != nullptr) { - user_metadata_with_nullability = *options.get_nullable_metadata(); - user_metadata = &user_metadata_with_nullability; + if (options.get_metadata()) { + table_meta = std::make_unique(*options.get_metadata()); } - init_state(); } @@ -695,148 +913,51 @@ void writer::impl::write(table_view const &table) { CUDF_EXPECTS(not closed, "Data has already been flushed to out and closed"); - size_type num_columns = table.num_columns(); - size_type num_rows = table.num_rows(); + size_type num_rows = table.num_rows(); - // Wrapper around cudf columns to attach parquet-specific type info. - // Note : I wish we could do this in the begin() function but since the - // metadata is optional we would have no way of knowing how many columns - // we actually have. - std::vector parquet_columns; - parquet_columns.reserve(num_columns); // Avoids unnecessary re-allocation - - // because the repetition type is global (in the sense of, not per-rowgroup or per write_chunk() - // call) we cannot know up front if the user is going to end up passing tables with nulls/no nulls - // in the multiple write_chunk() case. so we'll do some special handling. - // The user can pass in information about the nullability of a column to be enforced across - // write_chunk() calls, in a flattened bool vector. Figure out that per column. - auto per_column_nullability = - (single_write_mode) - ? std::vector>{} - : get_per_column_nullability(table, user_metadata_with_nullability.column_nullable); - - uint decimal_precision_idx = 0; - - for (auto it = table.begin(); it < table.end(); ++it) { - const auto col = *it; - const auto current_id = parquet_columns.size(); - - // if the user is explicitly saying "I am only calling this once", assume the columns in this - // one table tell us everything we need to know about their nullability. - // Empty nullability means the writer figures out the nullability from the cudf columns. - auto const &this_column_nullability = - (single_write_mode) ? std::vector{} : per_column_nullability[current_id]; - - parquet_columns.emplace_back(current_id, - col, - this_column_nullability, - user_metadata, - int96_timestamps, - decimal_precision, - decimal_precision_idx, - stream); - } + if (not table_meta) { table_meta = std::make_unique(table); } - CUDF_EXPECTS(decimal_precision_idx == decimal_precision.size(), - "Too many decimal precision values!"); + // Fill unnamed columns' names in table_meta + std::function add_default_name = + [&](column_in_metadata &col_meta, std::string default_name) { + if (col_meta.get_name().empty()) col_meta.set_name(default_name); + for (size_type i = 0; i < col_meta.num_children(); ++i) { + add_default_name(col_meta.child(i), col_meta.get_name() + "_" + std::to_string(i)); + } + }; + for (size_t i = 0; i < table_meta->column_metadata.size(); ++i) { + add_default_name(table_meta->column_metadata[i], "_col" + std::to_string(i)); + } - // first call. setup metadata. num_rows will get incremented as write_chunk is - // called multiple times. - // Calculate the sum of depths of all list columns - size_type const list_col_depths = std::accumulate( - parquet_columns.cbegin(), parquet_columns.cend(), 0, [](size_type sum, auto const &col) { - return sum + col.nesting_levels(); - }); + auto vec = input_table_to_linked_columns(table); + auto schema_tree = construct_schema_tree(vec, *table_meta, single_write_mode, int96_timestamps); + // Construct parquet_column_views from the schema tree leaf nodes. + std::vector parquet_columns; - // Make schema with current table - std::vector this_table_schema; - { - // Each level of nesting requires two levels of Schema. The leaf level needs one schema element - this_table_schema.reserve(1 + num_columns + list_col_depths * 2); - SchemaElement root{}; - root.type = UNDEFINED_TYPE; - root.repetition_type = NO_REPETITION_TYPE; - root.name = "schema"; - root.num_children = num_columns; - this_table_schema.push_back(std::move(root)); - for (auto i = 0; i < num_columns; i++) { - auto &col = parquet_columns[i]; - if (col.is_list()) { - size_type nesting_depth = col.nesting_levels(); - // Each level of nesting requires two levels of Schema. The leaf level needs one schema - // element - std::vector list_schema(nesting_depth * 2 + 1); - for (size_type j = 0; j < nesting_depth; j++) { - // List schema is denoted by two levels for each nesting level and one final level for - // leaf. The top level is the same name as the column name. - // So e.g. List> is denoted in the schema by - // "col_name" : { "list" : { "element" : { "list" : { "element" } } } } - auto const group_idx = 2 * j; - auto const list_idx = 2 * j + 1; - - list_schema[group_idx].name = (j == 0) ? col.name() : "element"; - list_schema[group_idx].repetition_type = (col.level_nullable(j)) ? OPTIONAL : REQUIRED; - list_schema[group_idx].converted_type = ConvertedType::LIST; - list_schema[group_idx].num_children = 1; - - list_schema[list_idx].name = "list"; - list_schema[list_idx].repetition_type = REPEATED; - list_schema[list_idx].num_children = 1; - } - list_schema[nesting_depth * 2].name = "element"; - list_schema[nesting_depth * 2].repetition_type = - col.level_nullable(nesting_depth) ? OPTIONAL : REQUIRED; - auto const &physical_type = col.physical_type(); - list_schema[nesting_depth * 2].type = physical_type; - list_schema[nesting_depth * 2].converted_type = - physical_type == parquet::Type::INT96 ? ConvertedType::UNKNOWN : col.converted_type(); - list_schema[nesting_depth * 2].num_children = 0; - list_schema[nesting_depth * 2].decimal_precision = col.decimal_precision(); - list_schema[nesting_depth * 2].decimal_scale = col.decimal_scale(); - - std::vector path_in_schema; - std::transform( - list_schema.cbegin(), list_schema.cend(), std::back_inserter(path_in_schema), [](auto s) { - return s.name; - }); - col.set_path_in_schema(path_in_schema); - this_table_schema.insert(this_table_schema.end(), list_schema.begin(), list_schema.end()); - } else { - SchemaElement col_schema{}; - // Column metadata - auto const &physical_type = col.physical_type(); - col_schema.type = physical_type; - col_schema.converted_type = - physical_type == parquet::Type::INT96 ? ConvertedType::UNKNOWN : col.converted_type(); - - col_schema.repetition_type = - (col.max_def_level() == 1 || (single_write_mode && col.row_count() < (size_t)num_rows)) - ? OPTIONAL - : REQUIRED; - - col_schema.name = col.name(); - col_schema.num_children = 0; // Leaf node - col_schema.decimal_precision = col.decimal_precision(); - col_schema.decimal_scale = col.decimal_scale(); - - this_table_schema.push_back(std::move(col_schema)); - } - } + for (schema_tree_node const &schema_node : schema_tree) { + if (schema_node.leaf_column) { parquet_columns.emplace_back(schema_node, schema_tree, stream); } } + // Mass allocation of column_device_views for each parquet_column_view + std::vector cudf_cols; + cudf_cols.reserve(parquet_columns.size()); + for (auto const &parq_col : parquet_columns) { cudf_cols.push_back(parq_col.cudf_column_view()); } + table_view single_streams_table(cudf_cols); + size_type num_columns = single_streams_table.num_columns(); + + std::vector this_table_schema(schema_tree.begin(), schema_tree.end()); + if (md.version == 0) { md.version = 1; md.num_rows = num_rows; md.column_order_listsize = (stats_granularity_ != statistics_freq::STATISTICS_NONE) ? num_columns : 0; - if (user_metadata != nullptr) { - std::transform(user_metadata->user_data.begin(), - user_metadata->user_data.end(), - std::back_inserter(md.key_value_metadata), - [](auto const &kv) { - return KeyValue{kv.first, kv.second}; - }); - } + std::transform(table_meta->user_data.begin(), + table_meta->user_data.end(), + std::back_inserter(md.key_value_metadata), + [](auto const &kv) { + return KeyValue{kv.first, kv.second}; + }); md.schema = this_table_schema; } else { // verify the user isn't passing mismatched tables @@ -848,49 +969,17 @@ void writer::impl::write(table_view const &table) } // Create table_device_view so that corresponding column_device_view data // can be written into col_desc members - auto parent_column_table_device_view = table_device_view::create(table); + auto parent_column_table_device_view = table_device_view::create(single_streams_table); rmm::device_uvector leaf_column_views(0, stream); // Initialize column description - hostdevice_vector col_desc(num_columns, stream); - - // setup gpu column description. - // applicable to only this _write_chunk() call - for (auto i = 0; i < num_columns; i++) { - auto &col = parquet_columns[i]; - // GPU column description - auto *desc = &col_desc[i]; - *desc = gpu::EncColumnDesc{}; // Zero out all fields - desc->column_data_base = col.data(); - desc->valid_map_base = col.nulls(); - desc->column_offset = col.offset(); - desc->stats_dtype = col.stats_type(); - desc->ts_scale = col.ts_scale(); - // TODO (dm): Enable dictionary for list after refactor - if (col.physical_type() != BOOLEAN && col.physical_type() != UNDEFINED_TYPE && !col.is_list()) { - col.alloc_dictionary(col.data_count()); - desc->dict_index = col.get_dict_index(); - desc->dict_data = col.get_dict_data(); - } - if (col.is_list()) { - desc->level_offsets = col.level_offsets(); - desc->rep_values = col.repetition_levels(); - desc->def_values = col.definition_levels(); - } - desc->num_values = col.data_count(); - desc->num_rows = col.row_count(); - desc->physical_type = static_cast(col.physical_type()); - desc->converted_type = static_cast(col.converted_type()); - auto count_bits = [](uint16_t number) { - int16_t nbits = 0; - while (number > 0) { - nbits++; - number >>= 1; - } - return nbits; - }; - desc->level_bits = count_bits(col.nesting_levels()) << 4 | count_bits(col.max_def_level()); - } + hostdevice_vector col_desc(parquet_columns.size(), stream); + // This should've been `auto const&` but isn't since dictionary space is allocated when calling + // get_device_view(). Fix during dictionary refactor. + std::transform( + parquet_columns.begin(), parquet_columns.end(), col_desc.host_ptr(), [](auto &pcol) { + return pcol.get_device_view(); + }); // Init page fragments // 5000 is good enough for up to ~200-character strings. Longer strings will start producing @@ -909,7 +998,7 @@ void writer::impl::write(table_view const &table) if (fragments.size() != 0) { // Move column info to device col_desc.host_to_device(stream); - leaf_column_views = create_leaf_column_device_views( + leaf_column_views = create_leaf_column_device_views( col_desc, *parent_column_table_device_view, stream); init_page_fragments(fragments, col_desc, num_columns, num_fragments, num_rows, fragment_size); diff --git a/cpp/src/io/parquet/writer_impl.hpp b/cpp/src/io/parquet/writer_impl.hpp index f5e0f7408c5..b8532d755eb 100644 --- a/cpp/src/io/parquet/writer_impl.hpp +++ b/cpp/src/io/parquet/writer_impl.hpp @@ -44,7 +44,7 @@ namespace io { namespace detail { namespace parquet { // Forward internal classes -class parquet_column_view; +struct parquet_column_view; using namespace cudf::io::parquet; using namespace cudf::io; @@ -130,7 +130,7 @@ class writer::impl { * @param fragment_size Number of rows per fragment */ void init_page_fragments(hostdevice_vector& frag, - hostdevice_vector& col_desc, + hostdevice_vector& col_desc, uint32_t num_columns, uint32_t num_fragments, uint32_t num_rows, @@ -148,7 +148,7 @@ class writer::impl { */ void gather_fragment_statistics(statistics_chunk* dst_stats, hostdevice_vector& frag, - hostdevice_vector& col_desc, + hostdevice_vector& col_desc, uint32_t num_columns, uint32_t num_fragments, uint32_t fragment_size); @@ -162,7 +162,7 @@ class writer::impl { * @param num_dictionaries Total number of dictionaries */ void build_chunk_dictionaries(hostdevice_vector& chunks, - hostdevice_vector& col_desc, + hostdevice_vector& col_desc, uint32_t num_rowgroups, uint32_t num_columns, uint32_t num_dictionaries); @@ -178,7 +178,7 @@ class writer::impl { * @param num_stats_bfr Number of statistics buffers */ void init_encoder_pages(hostdevice_vector& chunks, - hostdevice_vector& col_desc, + hostdevice_vector& col_desc, gpu::EncPage* pages, statistics_chunk* page_stats, statistics_chunk* frag_stats, @@ -228,15 +228,9 @@ class writer::impl { // Overall file metadata. Filled in during the process and written during write_chunked_end() cudf::io::parquet::FileMetaData md; // optional user metadata - table_metadata_with_nullability user_metadata_with_nullability; - // only used in the write_chunked() case. copied from the (optionally) user supplied - // argument to write() - table_metadata const* user_metadata = nullptr; + std::unique_ptr table_meta; // to track if the output has been written to sink bool closed = false; - // vector of precision values for decimal writing. Exactly one entry - // per decimal column. - std::vector decimal_precision; // current write position for rowgroups/chunks std::size_t current_chunk_offset; // special parameter only used by detail::write() to indicate that we are guaranteeing diff --git a/cpp/src/io/utilities/column_utils.cuh b/cpp/src/io/utilities/column_utils.cuh index 4f41e846631..c08f42583ef 100644 --- a/cpp/src/io/utilities/column_utils.cuh +++ b/cpp/src/io/utilities/column_utils.cuh @@ -57,27 +57,24 @@ rmm::device_uvector create_leaf_column_device_views( auto leaf_columns = cudf::device_span{leaf_column_views}; auto iter = thrust::make_counting_iterator(0); - thrust::for_each(rmm::exec_policy(stream), - iter, - iter + parent_table_device_view.num_columns(), - [col_desc, parent_col_view = parent_table_device_view, leaf_columns] __device__( - size_type index) mutable { - column_device_view col = parent_col_view.column(index); - - if (col.type().id() == type_id::LIST) { - col_desc[index].parent_column = parent_col_view.begin() + index; - } else { - col_desc[index].parent_column = nullptr; - } - // traverse till leaf column - while (col.type().id() == type_id::LIST) { - col = col.child(lists_column_view::child_column_index); - } - // Store leaf_column to device storage - column_device_view *leaf_col_ptr = leaf_columns.begin() + index; - *leaf_col_ptr = col; - col_desc[index].leaf_column = leaf_col_ptr; - }); + thrust::for_each( + rmm::exec_policy(stream), + iter, + iter + parent_table_device_view.num_columns(), + [col_desc, parent_col_view = parent_table_device_view, leaf_columns] __device__( + size_type index) mutable { + col_desc[index].parent_column = parent_col_view.begin() + index; + column_device_view col = parent_col_view.column(index); + // traverse till leaf column + while (col.type().id() == type_id::LIST or col.type().id() == type_id::STRUCT) { + col = (col.type().id() == type_id::LIST) ? col.child(lists_column_view::child_column_index) + : col.child(0); + } + // Store leaf_column to device storage + column_device_view *leaf_col_ptr = leaf_columns.begin() + index; + *leaf_col_ptr = col; + col_desc[index].leaf_column = leaf_col_ptr; + }); return leaf_column_views; } diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 420bdc3e3ba..995ee94472f 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -100,6 +100,25 @@ std::unique_ptr create_compressible_fixed_table(cudf::size_type num return create_fixed_table(num_columns, num_rows, include_validity, compressible_elements); } +void compare_metadata_equality(cudf::io::table_input_metadata in_meta, + cudf::io::table_metadata out_meta) +{ + std::function compare_names = + [&](cudf::io::column_name_info out_col, cudf::io::column_in_metadata in_col) { + if (not in_col.get_name().empty()) { EXPECT_EQ(out_col.name, in_col.get_name()); } + EXPECT_EQ(out_col.children.size(), in_col.num_children()); + for (size_t i = 0; i < out_col.children.size(); ++i) { + compare_names(out_col.children[i], in_col.child(i)); + } + }; + + EXPECT_EQ(out_meta.schema_info.size(), in_meta.column_metadata.size()); + + for (size_t i = 0; i < out_meta.schema_info.size(); ++i) { + compare_names(out_meta.schema_info[i], in_meta.column_metadata[i]); + } +} + // Base test fixture for tests struct ParquetWriterTest : public cudf::test::BaseFixture { }; @@ -308,16 +327,6 @@ TEST_F(ParquetWriterTest, MultiColumn) column_wrapper col6{col6_data, col6_data + num_rows, validity}; column_wrapper col7{col7_data, col7_data + num_rows, validity}; - cudf_io::table_metadata expected_metadata; - // expected_metadata.column_names.emplace_back("bools"); - expected_metadata.column_names.emplace_back("int8s"); - expected_metadata.column_names.emplace_back("int16s"); - expected_metadata.column_names.emplace_back("int32s"); - expected_metadata.column_names.emplace_back("floats"); - expected_metadata.column_names.emplace_back("doubles"); - expected_metadata.column_names.emplace_back("decimal32s"); - expected_metadata.column_names.emplace_back("decimal64s"); - std::vector> cols; // cols.push_back(col0.release()); cols.push_back(col1.release()); @@ -330,12 +339,20 @@ TEST_F(ParquetWriterTest, MultiColumn) auto expected = std::make_unique(std::move(cols)); EXPECT_EQ(7, expected->num_columns()); + cudf_io::table_input_metadata expected_metadata(*expected); + // expected_metadata.column_metadata[0].set_name( "bools"); + expected_metadata.column_metadata[0].set_name("int8s"); + expected_metadata.column_metadata[1].set_name("int16s"); + expected_metadata.column_metadata[2].set_name("int32s"); + expected_metadata.column_metadata[3].set_name("floats"); + expected_metadata.column_metadata[4].set_name("doubles"); + expected_metadata.column_metadata[5].set_name("decimal32s").set_decimal_precision(10); + expected_metadata.column_metadata[6].set_name("decimal64s").set_decimal_precision(20); + auto filepath = temp_env->get_temp_filepath("MultiColumn.parquet"); cudf_io::parquet_writer_options out_opts = cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected->view()) .metadata(&expected_metadata); - std::vector precisions = {10, 20}; - out_opts.set_decimal_precision(precisions); cudf_io::write_parquet(out_opts); cudf_io::parquet_reader_options in_opts = @@ -343,7 +360,7 @@ TEST_F(ParquetWriterTest, MultiColumn) auto result = cudf_io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view()); - EXPECT_EQ(expected_metadata.column_names, result.metadata.column_names); + compare_metadata_equality(expected_metadata, result.metadata); } TEST_F(ParquetWriterTest, MultiColumnWithNulls) @@ -390,16 +407,6 @@ TEST_F(ParquetWriterTest, MultiColumnWithNulls) column_wrapper col6{col6_data, col6_data + num_rows, col6_mask}; column_wrapper col7{col7_data, col7_data + num_rows, col7_mask}; - cudf_io::table_metadata expected_metadata; - // expected_metadata.column_names.emplace_back("bools"); - expected_metadata.column_names.emplace_back("int8s"); - expected_metadata.column_names.emplace_back("int16s"); - expected_metadata.column_names.emplace_back("int32s"); - expected_metadata.column_names.emplace_back("floats"); - expected_metadata.column_names.emplace_back("doubles"); - expected_metadata.column_names.emplace_back("decimal32s"); - expected_metadata.column_names.emplace_back("decimal64s"); - std::vector> cols; // cols.push_back(col0.release()); cols.push_back(col1.release()); @@ -412,12 +419,20 @@ TEST_F(ParquetWriterTest, MultiColumnWithNulls) auto expected = std::make_unique
(std::move(cols)); EXPECT_EQ(7, expected->num_columns()); + cudf_io::table_input_metadata expected_metadata(*expected); + // expected_metadata.column_names.emplace_back("bools"); + expected_metadata.column_metadata[0].set_name("int8s"); + expected_metadata.column_metadata[1].set_name("int16s"); + expected_metadata.column_metadata[2].set_name("int32s"); + expected_metadata.column_metadata[3].set_name("floats"); + expected_metadata.column_metadata[4].set_name("doubles"); + expected_metadata.column_metadata[5].set_name("decimal32s").set_decimal_precision(9); + expected_metadata.column_metadata[6].set_name("decimal64s").set_decimal_precision(20); + auto filepath = temp_env->get_temp_filepath("MultiColumnWithNulls.parquet"); cudf_io::parquet_writer_options out_opts = cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected->view()) .metadata(&expected_metadata); - std::vector precisions = {9, 20}; - out_opts.set_decimal_precision(precisions); cudf_io::write_parquet(out_opts); @@ -426,7 +441,10 @@ TEST_F(ParquetWriterTest, MultiColumnWithNulls) auto result = cudf_io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view()); - EXPECT_EQ(expected_metadata.column_names, result.metadata.column_names); + // TODO: Need to be able to return metadata in tree form from reader so they can be compared. + // Unfortunately the closest thing to a heirarchical schema is column_name_info which does not + // have any tests for it c++ or python. + compare_metadata_equality(expected_metadata, result.metadata); } TEST_F(ParquetWriterTest, Strings) @@ -443,11 +461,6 @@ TEST_F(ParquetWriterTest, Strings) column_wrapper col1{strings.begin(), strings.end()}; column_wrapper col2{seq_col2.begin(), seq_col2.end(), validity}; - cudf_io::table_metadata expected_metadata; - expected_metadata.column_names.emplace_back("col_other"); - expected_metadata.column_names.emplace_back("col_string"); - expected_metadata.column_names.emplace_back("col_another"); - std::vector> cols; cols.push_back(col0.release()); cols.push_back(col1.release()); @@ -455,6 +468,11 @@ TEST_F(ParquetWriterTest, Strings) auto expected = std::make_unique
(std::move(cols)); EXPECT_EQ(3, expected->num_columns()); + cudf_io::table_input_metadata expected_metadata(*expected); + expected_metadata.column_metadata[0].set_name("col_other"); + expected_metadata.column_metadata[1].set_name("col_string"); + expected_metadata.column_metadata[2].set_name("col_another"); + auto filepath = temp_env->get_temp_filepath("Strings.parquet"); cudf_io::parquet_writer_options out_opts = cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected->view()) @@ -466,7 +484,7 @@ TEST_F(ParquetWriterTest, Strings) auto result = cudf_io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view()); - EXPECT_EQ(expected_metadata.column_names, result.metadata.column_names); + compare_metadata_equality(expected_metadata, result.metadata); } TEST_F(ParquetWriterTest, SlicedTable) @@ -479,7 +497,8 @@ TEST_F(ParquetWriterTest, SlicedTable) auto seq_col0 = random_values(num_rows); auto seq_col2 = random_values(num_rows); - auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); + auto validity = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3 != 0; }); column_wrapper col0{seq_col0.begin(), seq_col0.end(), validity}; column_wrapper col1{strings.begin(), strings.end()}; @@ -510,17 +529,64 @@ TEST_F(ParquetWriterTest, SlicedTable) }, valids2}; - cudf_io::table_metadata expected_metadata; - expected_metadata.column_names.emplace_back("col_other"); - expected_metadata.column_names.emplace_back("col_string"); - expected_metadata.column_names.emplace_back("col_another"); - expected_metadata.column_names.emplace_back("col_list"); - expected_metadata.column_names.emplace_back("col_multi_level_list"); + // Struct column + auto ages_col = cudf::test::fixed_width_column_wrapper{ + {48, 27, 25, 31, 351, 351, 29, 15}, {1, 1, 1, 1, 1, 0, 1, 1}}; - auto expected = table_view({col0, col1, col2, col3, col4}); + auto col5 = cudf::test::structs_column_wrapper{{ages_col}, {1, 1, 1, 1, 0, 1, 1, 1}}; + // Struct/List mixed column + + // [] + // [NULL, 2, NULL] + // [4, 5] + // NULL + // [] + // [7, 8, 9] + // [10] + // [11, 12] + lcw land{{{}, {{1, 2, 3}, valids}, {4, 5}, {}, {}, {7, 8, 9}, {10}, {11, 12}}, valids2}; + + // [] + // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]] + // [[7, 8], []] + // [[]] + // [[]] + // [[], [], []] + // [[10]] + // [[13, 14], [15]] + lcw flats{lcw{}, + {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, + {{7, 8}, {}}, + lcw{lcw{}}, + lcw{lcw{}}, + lcw{lcw{}, lcw{}, lcw{}}, + {lcw{10}}, + {{13, 14}, {15}}}; + + auto struct_1 = cudf::test::structs_column_wrapper{land, flats}; + auto is_human = cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, true, false, true, false}}; + auto col6 = cudf::test::structs_column_wrapper{{is_human, struct_1}}; + + auto expected = table_view({col0, col1, col2, col3, col4, col5, col6}); + + // auto expected_slice = expected; auto expected_slice = cudf::slice(expected, {2, static_cast(num_rows) - 1}); + cudf_io::table_input_metadata expected_metadata(expected_slice); + expected_metadata.column_metadata[0].set_name("col_other"); + expected_metadata.column_metadata[1].set_name("col_string"); + expected_metadata.column_metadata[2].set_name("col_another"); + expected_metadata.column_metadata[3].set_name("col_list"); + expected_metadata.column_metadata[4].set_name("col_multi_level_list"); + expected_metadata.column_metadata[5].set_name("col_struct"); + expected_metadata.column_metadata[5].set_name("col_struct_list"); + expected_metadata.column_metadata[6].child(0).set_name("human?"); + expected_metadata.column_metadata[6].child(1).set_name("particulars"); + expected_metadata.column_metadata[6].child(1).child(0).set_name("land"); + expected_metadata.column_metadata[6].child(1).child(1).set_name("flats"); + auto filepath = temp_env->get_temp_filepath("SlicedTable.parquet"); cudf_io::parquet_writer_options out_opts = cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected_slice) @@ -532,7 +598,7 @@ TEST_F(ParquetWriterTest, SlicedTable) auto result = cudf_io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view()); - EXPECT_EQ(expected_metadata.column_names, result.metadata.column_names); + compare_metadata_equality(expected_metadata, result.metadata); } TEST_F(ParquetWriterTest, ListColumn) @@ -607,18 +673,18 @@ TEST_F(ParquetWriterTest, ListColumn) }, valids2}; - cudf_io::table_metadata expected_metadata; - expected_metadata.column_names.emplace_back("col_list_int_0"); - expected_metadata.column_names.emplace_back("col_list_list_int_1"); - expected_metadata.column_names.emplace_back("col_list_list_int_nullable_2"); - expected_metadata.column_names.emplace_back("col_list_list_nullable_double_nullable_3"); - // expected_metadata.column_names.emplace_back("col_list_list_uint16_4"); - expected_metadata.column_names.emplace_back("col_list_nullable_list_nullable_int_nullable_5"); - expected_metadata.column_names.emplace_back("col_list_list_string_6"); - expected_metadata.column_names.emplace_back("col_list_list_list_7"); - table_view expected({col0, col1, col2, col3, /* col4, */ col5, col6, col7}); + cudf_io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("col_list_int_0"); + expected_metadata.column_metadata[1].set_name("col_list_list_int_1"); + expected_metadata.column_metadata[2].set_name("col_list_list_int_nullable_2"); + expected_metadata.column_metadata[3].set_name("col_list_list_nullable_double_nullable_3"); + // expected_metadata.column_metadata[0].set_name("col_list_list_uint16_4"); + expected_metadata.column_metadata[4].set_name("col_list_nullable_list_nullable_int_nullable_5"); + expected_metadata.column_metadata[5].set_name("col_list_list_string_6"); + expected_metadata.column_metadata[6].set_name("col_list_list_list_7"); + auto filepath = temp_env->get_temp_filepath("ListColumn.parquet"); auto out_opts = cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) .metadata(&expected_metadata) @@ -630,7 +696,7 @@ TEST_F(ParquetWriterTest, ListColumn) auto result = cudf_io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); - EXPECT_EQ(expected_metadata.column_names, result.metadata.column_names); + compare_metadata_equality(expected_metadata, result.metadata); } TEST_F(ParquetWriterTest, MultiIndex) @@ -650,15 +716,6 @@ TEST_F(ParquetWriterTest, MultiIndex) column_wrapper col4{col4_data.begin(), col4_data.end(), validity}; column_wrapper col5{col5_data.begin(), col5_data.end(), validity}; - cudf_io::table_metadata expected_metadata; - expected_metadata.column_names.emplace_back("int8s"); - expected_metadata.column_names.emplace_back("int16s"); - expected_metadata.column_names.emplace_back("int32s"); - expected_metadata.column_names.emplace_back("floats"); - expected_metadata.column_names.emplace_back("doubles"); - expected_metadata.user_data.insert( - {"pandas", "\"index_columns\": [\"floats\", \"doubles\"], \"column1\": [\"int8s\"]"}); - std::vector> cols; cols.push_back(col1.release()); cols.push_back(col2.release()); @@ -668,6 +725,15 @@ TEST_F(ParquetWriterTest, MultiIndex) auto expected = std::make_unique
(std::move(cols)); EXPECT_EQ(5, expected->num_columns()); + cudf_io::table_input_metadata expected_metadata(*expected); + expected_metadata.column_metadata[0].set_name("int8s"); + expected_metadata.column_metadata[1].set_name("int16s"); + expected_metadata.column_metadata[2].set_name("int32s"); + expected_metadata.column_metadata[3].set_name("floats"); + expected_metadata.column_metadata[4].set_name("doubles"); + expected_metadata.user_data.insert( + {"pandas", "\"index_columns\": [\"floats\", \"doubles\"], \"column1\": [\"int8s\"]"}); + auto filepath = temp_env->get_temp_filepath("MultiIndex.parquet"); cudf_io::parquet_writer_options out_opts = cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected->view()) @@ -681,7 +747,7 @@ TEST_F(ParquetWriterTest, MultiIndex) auto result = cudf_io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view()); - EXPECT_EQ(expected_metadata.column_names, result.metadata.column_names); + compare_metadata_equality(expected_metadata, result.metadata); } TEST_F(ParquetWriterTest, HostBuffer) @@ -692,14 +758,14 @@ TEST_F(ParquetWriterTest, HostBuffer) cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); column_wrapper col{seq_col.begin(), seq_col.end(), validity}; - cudf_io::table_metadata expected_metadata; - expected_metadata.column_names.emplace_back("col_other"); - std::vector> cols; cols.push_back(col.release()); const auto expected = std::make_unique
(std::move(cols)); EXPECT_EQ(1, expected->num_columns()); + cudf_io::table_input_metadata expected_metadata(*expected); + expected_metadata.column_metadata[0].set_name("col_other"); + std::vector out_buffer; cudf_io::parquet_writer_options out_opts = cudf_io::parquet_writer_options::builder(cudf_io::sink_info(&out_buffer), expected->view()) @@ -710,7 +776,7 @@ TEST_F(ParquetWriterTest, HostBuffer) const auto result = cudf_io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view()); - EXPECT_EQ(expected_metadata.column_names, result.metadata.column_names); + compare_metadata_equality(expected_metadata, result.metadata); } TEST_F(ParquetWriterTest, NonNullable) @@ -730,6 +796,175 @@ TEST_F(ParquetWriterTest, NonNullable) CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } +TEST_F(ParquetWriterTest, Struct) +{ + // Struct> + + auto names = {"Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Uberwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant"}; + + // `Name` column has all valid values. + auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; + + auto ages_col = + cudf::test::fixed_width_column_wrapper{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}}; + + auto struct_1 = cudf::test::structs_column_wrapper{{names_col, ages_col}, {1, 1, 1, 1, 0, 1}}; + + auto is_human_col = cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}}; + + auto struct_2 = + cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release(); + + auto expected = table_view({*struct_2}); + + auto filepath = temp_env->get_temp_filepath("Struct.parquet"); + cudf_io::parquet_writer_options args = + cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected); + cudf_io::write_parquet(args); + + cudf_io::parquet_reader_options read_args = + cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath)); + cudf_io::read_parquet(read_args); +} + +TEST_F(ParquetWriterTest, StructOfList) +{ + // Struct>, + // flats:List> + // > + // > + + auto weights_col = cudf::test::fixed_width_column_wrapper{1.1, 2.4, 5.3, 8.0, 9.6, 6.9}; + + auto ages_col = + cudf::test::fixed_width_column_wrapper{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}}; + + auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); + auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; }); + + using lcw = cudf::test::lists_column_wrapper; + + // [] + // [NULL, 2, NULL] + // [4, 5] + // NULL + // [] + // [7, 8, 9] + lcw land_unit{{{}, {{1, 2, 3}, valids}, {4, 5}, {}, {}, {7, 8, 9}}, valids2}; + + // [] + // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]] + // [[7, 8], []] + // [[]] + // [[]] + // [[], [], []] + lcw flats{lcw{}, + {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, + {{7, 8}, {}}, + lcw{lcw{}}, + lcw{lcw{}}, + lcw{lcw{}, lcw{}, lcw{}}}; + + auto struct_1 = cudf::test::structs_column_wrapper{{weights_col, ages_col, land_unit, flats}, + {1, 1, 1, 1, 0, 1}}; + + auto is_human_col = cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}}; + + auto struct_2 = + cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release(); + + // cudf::test::print(struct_2->child(1).child(2)); + + auto expected = table_view({*struct_2}); + + cudf_io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("being"); + expected_metadata.column_metadata[0].child(0).set_name("human?"); + expected_metadata.column_metadata[0].child(1).set_name("particulars"); + expected_metadata.column_metadata[0].child(1).child(0).set_name("weight"); + expected_metadata.column_metadata[0].child(1).child(1).set_name("age"); + expected_metadata.column_metadata[0].child(1).child(2).set_name("land_unit"); + expected_metadata.column_metadata[0].child(1).child(3).set_name("flats"); + + auto filepath = temp_env->get_temp_filepath("StructOfList.parquet"); + cudf_io::parquet_writer_options args = + cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + .metadata(&expected_metadata); + cudf_io::write_parquet(args); + + cudf_io::parquet_reader_options read_args = + cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath)); + const auto result = cudf_io::read_parquet(read_args); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); + compare_metadata_equality(expected_metadata, result.metadata); +} + +TEST_F(ParquetWriterTest, ListOfStruct) +{ + // List + // > + // > + + auto weight_col = cudf::test::fixed_width_column_wrapper{1.1, 2.4, 5.3, 8.0, 9.6, 6.9}; + + auto ages_col = + cudf::test::fixed_width_column_wrapper{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}}; + + auto struct_1 = cudf::test::structs_column_wrapper{{weight_col, ages_col}, {1, 1, 1, 1, 0, 1}}; + + auto is_human_col = cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}}; + + auto struct_2 = + cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release(); + + auto list_offsets_column = + cudf::test::fixed_width_column_wrapper{0, 2, 5, 5, 6}.release(); + auto num_list_rows = list_offsets_column->size() - 1; + + auto list_col = cudf::make_lists_column(num_list_rows, + std::move(list_offsets_column), + std::move(struct_2), + cudf::UNKNOWN_NULL_COUNT, + {}); + + auto expected = table_view({*list_col}); + + cudf_io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("family"); + expected_metadata.column_metadata[0].child(1).child(0).set_name("human?"); + expected_metadata.column_metadata[0].child(1).child(1).set_name("particulars"); + expected_metadata.column_metadata[0].child(1).child(1).child(0).set_name("weight"); + expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age"); + + auto filepath = temp_env->get_temp_filepath("ListOfStruct.parquet"); + cudf_io::parquet_writer_options args = + cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + .metadata(&expected_metadata); + cudf_io::write_parquet(args); + + cudf_io::parquet_reader_options read_args = + cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath)); + const auto result = cudf_io::read_parquet(read_args); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); + compare_metadata_equality(expected_metadata, result.metadata); +} + // custom data sink that supports device writes. uses plain file io. class custom_test_data_sink : public cudf::io::data_sink { public: @@ -1055,6 +1290,168 @@ TEST_F(ParquetChunkedWriterTest, ListColumn) CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } +TEST_F(ParquetChunkedWriterTest, ListOfStruct) +{ + // Table 1 + auto weight_1 = cudf::test::fixed_width_column_wrapper{{57.5, 51.1, 15.3}}; + auto ages_1 = cudf::test::fixed_width_column_wrapper{{30, 27, 5}}; + auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1}; + auto is_human_1 = cudf::test::fixed_width_column_wrapper{{true, true, false}}; + auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}}; + + auto list_offsets_column_1 = + cudf::test::fixed_width_column_wrapper{0, 2, 3, 3}.release(); + auto num_list_rows_1 = list_offsets_column_1->size() - 1; + + auto list_col_1 = cudf::make_lists_column(num_list_rows_1, + std::move(list_offsets_column_1), + struct_2_1.release(), + cudf::UNKNOWN_NULL_COUNT, + {}); + + auto table_1 = table_view({*list_col_1}); + + // Table 2 + auto weight_2 = cudf::test::fixed_width_column_wrapper{{1.1, -1.0, -1.0}}; + auto ages_2 = cudf::test::fixed_width_column_wrapper{{31, 351, 351}, {1, 1, 0}}; + auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2}, {1, 0, 1}}; + auto is_human_2 = cudf::test::fixed_width_column_wrapper{{false, false, false}, {1, 1, 0}}; + auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}}; + + auto list_offsets_column_2 = + cudf::test::fixed_width_column_wrapper{0, 1, 2, 3}.release(); + auto num_list_rows_2 = list_offsets_column_2->size() - 1; + + auto list_col_2 = cudf::make_lists_column(num_list_rows_2, + std::move(list_offsets_column_2), + struct_2_2.release(), + cudf::UNKNOWN_NULL_COUNT, + {}); + + auto table_2 = table_view({*list_col_2}); + + auto full_table = cudf::concatenate({table_1, table_2}); + + cudf_io::table_input_metadata expected_metadata(table_1); + expected_metadata.column_metadata[0].set_name("family"); + expected_metadata.column_metadata[0].child(1).set_nullability(false); + expected_metadata.column_metadata[0].child(1).child(0).set_name("human?"); + expected_metadata.column_metadata[0].child(1).child(1).set_name("particulars"); + expected_metadata.column_metadata[0].child(1).child(1).child(0).set_name("weight"); + expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age"); + + auto filepath = temp_env->get_temp_filepath("ChunkedListOfStruct.parquet"); + cudf_io::chunked_parquet_writer_options args = + cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); + args.set_metadata(&expected_metadata); + cudf_io::parquet_chunked_writer(args).write(table_1).write(table_2); + + cudf_io::parquet_reader_options read_opts = + cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); + auto result = cudf_io::read_parquet(read_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table); + compare_metadata_equality(expected_metadata, result.metadata); +} + +TEST_F(ParquetChunkedWriterTest, ListOfStructOfStructOfListOfList) +{ + auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); + auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; }); + + using lcw = cudf::test::lists_column_wrapper; + + // Table 1 =========================== + + // [] + // [NULL, 2, NULL] + // [4, 5] + // NULL + lcw land_1{{{}, {{1, 2, 3}, valids}, {4, 5}, {}}, valids2}; + + // [] + // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]] + // [[7, 8], []] + // [[]] + lcw flats_1{lcw{}, {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, {{7, 8}, {}}, lcw{lcw{}}}; + + auto weight_1 = cudf::test::fixed_width_column_wrapper{{57.5, 51.1, 15.3, 1.1}}; + auto ages_1 = cudf::test::fixed_width_column_wrapper{{30, 27, 5, 31}}; + auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1, land_1, flats_1}; + auto is_human_1 = cudf::test::fixed_width_column_wrapper{{true, true, false, false}}; + auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}}; + + auto list_offsets_column_1 = + cudf::test::fixed_width_column_wrapper{0, 2, 3, 4}.release(); + auto num_list_rows_1 = list_offsets_column_1->size() - 1; + + auto list_col_1 = cudf::make_lists_column(num_list_rows_1, + std::move(list_offsets_column_1), + struct_2_1.release(), + cudf::UNKNOWN_NULL_COUNT, + {}); + + auto table_1 = table_view({*list_col_1}); + + // Table 2 =========================== + + // [] + // [7, 8, 9] + lcw land_2{{}, {7, 8, 9}}; + + // [[]] + // [[], [], []] + lcw flats_2{lcw{lcw{}}, lcw{lcw{}, lcw{}, lcw{}}}; + + auto weight_2 = cudf::test::fixed_width_column_wrapper{{-1.0, -1.0}}; + auto ages_2 = cudf::test::fixed_width_column_wrapper{{351, 351}, {1, 0}}; + auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2, land_2, flats_2}, {0, 1}}; + auto is_human_2 = cudf::test::fixed_width_column_wrapper{{false, false}, {1, 0}}; + auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}}; + + auto list_offsets_column_2 = + cudf::test::fixed_width_column_wrapper{0, 1, 2}.release(); + auto num_list_rows_2 = list_offsets_column_2->size() - 1; + + auto list_col_2 = cudf::make_lists_column(num_list_rows_2, + std::move(list_offsets_column_2), + struct_2_2.release(), + cudf::UNKNOWN_NULL_COUNT, + {}); + + auto table_2 = table_view({*list_col_2}); + + auto full_table = cudf::concatenate({table_1, table_2}); + + cudf_io::table_input_metadata expected_metadata(table_1); + expected_metadata.column_metadata[0].set_name("family"); + expected_metadata.column_metadata[0].child(1).set_nullability(false); + expected_metadata.column_metadata[0].child(1).child(0).set_name("human?"); + expected_metadata.column_metadata[0].child(1).child(1).set_name("particulars"); + expected_metadata.column_metadata[0].child(1).child(1).child(0).set_name("weight"); + expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age"); + expected_metadata.column_metadata[0].child(1).child(1).child(2).set_name("land_unit"); + expected_metadata.column_metadata[0].child(1).child(1).child(3).set_name("flats"); + + auto filepath = temp_env->get_temp_filepath("ListOfStructOfStructOfListOfList.parquet"); + cudf_io::chunked_parquet_writer_options args = + cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); + args.set_metadata(&expected_metadata); + cudf_io::parquet_chunked_writer(args).write(table_1).write(table_2); + + cudf_io::parquet_reader_options read_opts = + cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); + auto result = cudf_io::read_parquet(read_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table); + compare_metadata_equality(expected_metadata, result.metadata); + + // We specifically mentioned in input schema that struct_2 is non-nullable across chunked calls. + auto result_parent_list = result.tbl->get_column(0); + auto result_struct_2 = result_parent_list.child(cudf::lists_column_view::child_column_index); + EXPECT_EQ(result_struct_2.nullable(), false); +} + TEST_F(ParquetChunkedWriterTest, MismatchedTypes) { srand(31337); @@ -1150,8 +1547,7 @@ TEST_F(ParquetChunkedWriterTest, MismatchedStructureList) cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); cudf_io::parquet_chunked_writer writer(args); writer.write(tbl0); - CUDF_EXPECT_THROW_MESSAGE(writer.write(tbl1), - "Mismatch in schema between multiple calls to write_chunk"); + EXPECT_THROW(writer.write(tbl1), cudf::logic_error); } TEST_F(ParquetChunkedWriterTest, DifferentNullability) @@ -1174,6 +1570,54 @@ TEST_F(ParquetChunkedWriterTest, DifferentNullability) CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } +TEST_F(ParquetChunkedWriterTest, DifferentNullabilityStruct) +{ + // Struct, + // age:int + // > (nullable) + // > (non-nullable) + + // Table 1: is_human and struct_1 are non-nullable but should be nullable when read back. + auto weight_1 = cudf::test::fixed_width_column_wrapper{{57.5, 51.1, 15.3}}; + auto ages_1 = cudf::test::fixed_width_column_wrapper{{30, 27, 5}}; + auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1}; + auto is_human_1 = cudf::test::fixed_width_column_wrapper{{true, true, false}}; + auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}}; + auto table_1 = cudf::table_view({struct_2_1}); + + // Table 2: struct_1 and is_human are nullable now so if we hadn't assumed worst case (nullable) + // when writing table_1, we would have wrong pages for it. + auto weight_2 = cudf::test::fixed_width_column_wrapper{{1.1, -1.0, -1.0}}; + auto ages_2 = cudf::test::fixed_width_column_wrapper{{31, 351, 351}, {1, 1, 0}}; + auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2}, {1, 0, 1}}; + auto is_human_2 = cudf::test::fixed_width_column_wrapper{{false, false, false}, {1, 1, 0}}; + auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}}; + auto table_2 = cudf::table_view({struct_2_2}); + + auto full_table = cudf::concatenate({table_1, table_2}); + + cudf_io::table_input_metadata expected_metadata(table_1); + expected_metadata.column_metadata[0].set_name("being"); + expected_metadata.column_metadata[0].child(0).set_name("human?"); + expected_metadata.column_metadata[0].child(1).set_name("particulars"); + expected_metadata.column_metadata[0].child(1).child(0).set_name("weight"); + expected_metadata.column_metadata[0].child(1).child(1).set_name("age"); + + auto filepath = temp_env->get_temp_filepath("ChunkedNullableStruct.parquet"); + cudf_io::chunked_parquet_writer_options args = + cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); + args.set_metadata(&expected_metadata); + cudf_io::parquet_chunked_writer(args).write(table_1).write(table_2); + + cudf_io::parquet_reader_options read_opts = + cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); + auto result = cudf_io::read_parquet(read_opts); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table); + compare_metadata_equality(expected_metadata, result.metadata); +} + TEST_F(ParquetChunkedWriterTest, ForcedNullability) { srand(31337); @@ -1184,17 +1628,17 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullability) auto filepath = temp_env->get_temp_filepath("ChunkedNoNullable.parquet"); - cudf::io::table_metadata_with_nullability nullable_metadata; + cudf_io::table_input_metadata metadata(*table1); // In the absence of prescribed per-column nullability in metadata, the writer assumes the worst // and considers all columns nullable. However cudf::concatenate will not force nulls in case no // columns are nullable. To get the expected result, we tell the writer the nullability of all // columns in advance. - nullable_metadata.column_nullable.insert(nullable_metadata.column_nullable.begin(), 5, false); + for (auto& col_meta : metadata.column_metadata) { col_meta.set_nullability(false); } cudf_io::chunked_parquet_writer_options args = cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}) - .nullable_metadata(&nullable_metadata); + .metadata(&metadata); cudf_io::parquet_chunked_writer(args).write(*table1).write(*table2); cudf_io::parquet_reader_options read_opts = @@ -1213,8 +1657,6 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityList) using lcw = cudf::test::lists_column_wrapper; - cudf::io::table_metadata_with_nullability nullable_metadata; - // COL0 ==================== // [1, 2, 3] // [] @@ -1228,9 +1670,6 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityList) // NULL lcw col01{{{7}, {}, {8, 9, 10, 11}, {}}, valids2}; - nullable_metadata.column_nullable.push_back(true); // List is nullable at first (root) level - nullable_metadata.column_nullable.push_back(false); // non-nullable at second (leaf) level - // COL1 (non-nested columns to test proper schema construction) size_t num_rows = static_cast(col00).size(); auto seq_col0 = random_values(num_rows); @@ -1239,18 +1678,22 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityList) column_wrapper col10{seq_col0.begin(), seq_col0.end(), valids}; column_wrapper col11{seq_col1.begin(), seq_col1.end(), valids2}; - nullable_metadata.column_nullable.push_back(true); - auto table1 = table_view({col00, col10}); auto table2 = table_view({col01, col11}); auto full_table = cudf::concatenate({table1, table2}); + cudf_io::table_input_metadata metadata(table1); + metadata.column_metadata[0].set_nullability(true); // List is nullable at first (root) level + metadata.column_metadata[0].child(1).set_nullability( + false); // non-nullable at second (leaf) level + metadata.column_metadata[1].set_nullability(true); + auto filepath = temp_env->get_temp_filepath("ChunkedListNullable.parquet"); cudf_io::chunked_parquet_writer_options args = cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}) - .nullable_metadata(&nullable_metadata); + .metadata(&metadata); cudf_io::parquet_chunked_writer(args).write(table1).write(table2); cudf_io::parquet_reader_options read_opts = @@ -1260,30 +1703,50 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityList) CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } -TEST_F(ParquetChunkedWriterTest, WrongNullability) +TEST_F(ParquetChunkedWriterTest, ForcedNullabilityStruct) { - srand(31337); - auto table1 = create_random_fixed_table(5, 5, false); + // Struct, + // age:int + // > (nullable) + // > (non-nullable) + + // Table 1: is_human and struct_2 are non-nullable and should stay that way when read back. + auto weight_1 = cudf::test::fixed_width_column_wrapper{{57.5, 51.1, 15.3}}; + auto ages_1 = cudf::test::fixed_width_column_wrapper{{30, 27, 5}}; + auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1}; + auto is_human_1 = cudf::test::fixed_width_column_wrapper{{true, true, false}}; + auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}}; + auto table_1 = cudf::table_view({struct_2_1}); + + auto weight_2 = cudf::test::fixed_width_column_wrapper{{1.1, -1.0, -1.0}}; + auto ages_2 = cudf::test::fixed_width_column_wrapper{{31, 351, 351}, {1, 1, 0}}; + auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2}, {1, 0, 1}}; + auto is_human_2 = cudf::test::fixed_width_column_wrapper{{false, false, false}}; + auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}}; + auto table_2 = cudf::table_view({struct_2_2}); + + auto full_table = cudf::concatenate({table_1, table_2}); + + cudf_io::table_input_metadata expected_metadata(table_1); + expected_metadata.column_metadata[0].set_name("being").set_nullability(false); + expected_metadata.column_metadata[0].child(0).set_name("human?").set_nullability(false); + expected_metadata.column_metadata[0].child(1).set_name("particulars"); + expected_metadata.column_metadata[0].child(1).child(0).set_name("weight"); + expected_metadata.column_metadata[0].child(1).child(1).set_name("age"); + + auto filepath = temp_env->get_temp_filepath("ChunkedNullableStruct.parquet"); + cudf_io::chunked_parquet_writer_options args = + cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); + args.set_metadata(&expected_metadata); + cudf_io::parquet_chunked_writer(args).write(table_1).write(table_2); - auto filepath = temp_env->get_temp_filepath("ChunkedWrongNullable.parquet"); + cudf_io::parquet_reader_options read_opts = + cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); + auto result = cudf_io::read_parquet(read_opts); - cudf::io::table_metadata_with_nullability nullable_metadata; - // Number of columns with mask in table (i.e 5) and size of column nullability (i.e 6), are - // mismatching. - nullable_metadata.column_nullable.insert(nullable_metadata.column_nullable.begin(), 6, true); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}) - .nullable_metadata(&nullable_metadata); - EXPECT_THROW(cudf_io::parquet_chunked_writer(args).write(*table1), cudf::logic_error); - - nullable_metadata.column_nullable.clear(); - // Number of columns with mask in table (i.e 5) and size of column nullability (i.e 4), are - // mismatching. - nullable_metadata.column_nullable.insert(nullable_metadata.column_nullable.begin(), 4, true); - cudf_io::chunked_parquet_writer_options args2 = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}) - .nullable_metadata(&nullable_metadata); - EXPECT_THROW(cudf_io::parquet_chunked_writer(args2).write(*table1), cudf::logic_error); + CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); + compare_metadata_equality(expected_metadata, result.metadata); } TEST_F(ParquetChunkedWriterTest, ReadRowGroups) @@ -1328,7 +1791,7 @@ TEST_F(ParquetChunkedWriterTest, ReadRowGroupsError) EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error); } -TEST_F(ParquetChunkedWriterTest, DecimalWrite) +TEST_F(ParquetWriterTest, DecimalWrite) { constexpr cudf::size_type num_rows = 500; auto seq_col0 = random_values(num_rows); @@ -1345,36 +1808,25 @@ TEST_F(ParquetChunkedWriterTest, DecimalWrite) auto table = table_view({col0, col1}); auto filepath = temp_env->get_temp_filepath("DecimalWrite.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); + cudf_io::parquet_writer_options args = + cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, table); // verify failure if no decimal precision given - EXPECT_THROW(cudf_io::parquet_chunked_writer(args).write(table), cudf::logic_error); + EXPECT_THROW(cudf_io::write_parquet(args), cudf::logic_error); + + cudf_io::table_input_metadata expected_metadata(table); // verify failure if too small a precision is given - std::vector precisions{7, 1}; - args.set_decimal_precision_data(precisions); - EXPECT_THROW(cudf_io::parquet_chunked_writer(args).write(table), cudf::logic_error); - - // verify failure if too few precisions given - precisions.pop_back(); - args.set_decimal_precision_data(precisions); - EXPECT_THROW(cudf_io::parquet_chunked_writer(args).write(table), cudf::logic_error); - - // verify sucess if equal precision is given - precisions = {7, 9}; - args.set_decimal_precision_data(precisions); - cudf_io::parquet_chunked_writer(args).write(table); - - // verify failure if too many precisions given - precisions = {7, 14, 11}; - args.set_decimal_precision_data(precisions); - EXPECT_THROW(cudf_io::parquet_chunked_writer(args).write(table), cudf::logic_error); - - // write correctly - precisions.pop_back(); - args.set_decimal_precision_data(precisions); - cudf_io::parquet_chunked_writer(args).write(table); + expected_metadata.column_metadata[0].set_decimal_precision(7); + expected_metadata.column_metadata[1].set_decimal_precision(1); + args.set_metadata(&expected_metadata); + EXPECT_THROW(cudf_io::write_parquet(args), cudf::logic_error); + + // verify success if equal precision is given + expected_metadata.column_metadata[0].set_decimal_precision(7); + expected_metadata.column_metadata[1].set_decimal_precision(9); + args.set_metadata(&expected_metadata); + cudf_io::write_parquet(args); cudf_io::parquet_reader_options read_opts = cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); @@ -1744,9 +2196,9 @@ TEST_F(ParquetReaderTest, ReorderedColumns) cudf::table_view tbl{{a, b}}; auto filepath = temp_env->get_temp_filepath("ReorderedColumns.parquet"); - cudf_io::table_metadata md; - md.column_names.push_back("a"); - md.column_names.push_back("b"); + cudf_io::table_input_metadata md(tbl); + md.column_metadata[0].set_name("a"); + md.column_metadata[1].set_name("b"); cudf_io::parquet_writer_options opts = cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl).metadata(&md); cudf_io::write_parquet(opts); @@ -1766,9 +2218,9 @@ TEST_F(ParquetReaderTest, ReorderedColumns) cudf::table_view tbl{{a, b}}; auto filepath = temp_env->get_temp_filepath("ReorderedColumns2.parquet"); - cudf_io::table_metadata md; - md.column_names.push_back("a"); - md.column_names.push_back("b"); + cudf_io::table_input_metadata md(tbl); + md.column_metadata[0].set_name("a"); + md.column_metadata[1].set_name("b"); cudf_io::parquet_writer_options opts = cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl).metadata(&md); cudf_io::write_parquet(opts); @@ -1791,11 +2243,11 @@ TEST_F(ParquetReaderTest, ReorderedColumns) cudf::table_view tbl{{a, b, c, d}}; auto filepath = temp_env->get_temp_filepath("ReorderedColumns3.parquet"); - cudf_io::table_metadata md; - md.column_names.push_back("a"); - md.column_names.push_back("b"); - md.column_names.push_back("c"); - md.column_names.push_back("d"); + cudf_io::table_input_metadata md(tbl); + md.column_metadata[0].set_name("a"); + md.column_metadata[1].set_name("b"); + md.column_metadata[2].set_name("c"); + md.column_metadata[3].set_name("d"); cudf_io::parquet_writer_options opts = cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl).metadata(&md); cudf_io::write_parquet(opts); @@ -2205,4 +2657,5 @@ TEST_F(ParquetReaderTest, DecimalRead) EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error); } } + CUDF_TEST_PROGRAM_MAIN() diff --git a/python/cudf/cudf/_lib/cpp/io/parquet.pxd b/python/cudf/cudf/_lib/cpp/io/parquet.pxd index f7f094834e6..519565fa48c 100644 --- a/python/cudf/cudf/_lib/cpp/io/parquet.pxd +++ b/python/cudf/cudf/_lib/cpp/io/parquet.pxd @@ -3,6 +3,7 @@ from libcpp cimport bool from libcpp.string cimport string from libcpp.vector cimport vector +from libcpp.map cimport map from libcpp.memory cimport shared_ptr, unique_ptr from libc.stdint cimport uint8_t @@ -64,17 +65,35 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: cdef cudf_io_types.table_with_metadata read_parquet( parquet_reader_options args) except + + cdef cppclass column_in_metadata: + column_in_metadata& set_name(const string& name) + column_in_metadata& set_nullability(bool nullable) + column_in_metadata& set_list_column_as_map() + column_in_metadata& set_int96_timestamps(bool req) + column_in_metadata& child(size_type i) + + cdef cppclass table_input_metadata: + table_input_metadata() except + + table_input_metadata(const cudf_table_view.table_view& table) except + + table_input_metadata( + const cudf_table_view.table_view& table, + map[string, string] user_data + ) except + + + vector[column_in_metadata] column_metadata + map[string, string] user_data + cdef cppclass parquet_writer_options: parquet_writer_options() except + cudf_io_types.sink_info get_sink_info() except + cudf_io_types.compression_type get_compression() except + cudf_io_types.statistics_freq get_stats_level() except + cudf_table_view.table_view get_table() except + - const cudf_io_types.table_metadata get_metadata() except + + const table_input_metadata get_metadata() except + string get_column_chunks_file_path() except+ void set_metadata( - cudf_io_types.table_metadata *m + table_input_metadata *m ) except + void set_stats_level( cudf_io_types.statistics_freq sf @@ -100,7 +119,7 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: cudf_table_view.table_view table_ ) except + parquet_writer_options_builder& metadata( - cudf_io_types.table_metadata *m + table_input_metadata *m ) except + parquet_writer_options_builder& stats_level( cudf_io_types.statistics_freq sf @@ -126,11 +145,11 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: cudf_io_types.sink_info get_sink() except + cudf_io_types.compression_type get_compression() except + cudf_io_types.statistics_freq get_stats_level() except + - cudf_io_types.table_metadata_with_nullability* get_nullable_metadata( + table_input_metadata* get_metadata( ) except+ - void set_nullable_metadata( - cudf_io_types.table_metadata_with_nullability *m + void set_metadata( + table_input_metadata *m ) except + void set_stats_level( cudf_io_types.statistics_freq sf @@ -149,8 +168,8 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: chunked_parquet_writer_options_builder( cudf_io_types.sink_info sink_, ) except + - chunked_parquet_writer_options_builder& nullable_metadata( - cudf_io_types.table_metadata_with_nullability *m + chunked_parquet_writer_options_builder& metadata( + table_input_metadata *m ) except + chunked_parquet_writer_options_builder& stats_level( cudf_io_types.statistics_freq sf diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index a9739a02283..87179c02fe2 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -48,6 +48,8 @@ from cudf._lib.cpp.table.table_view cimport ( from cudf._lib.cpp.io.parquet cimport ( read_parquet as parquet_reader, parquet_reader_options, + table_input_metadata, + column_in_metadata, parquet_writer_options, write_parquet as parquet_writer, parquet_chunked_writer as cpp_parquet_chunked_writer, @@ -284,10 +286,8 @@ cpdef write_parquet( """ # Create the write options - cdef unique_ptr[cudf_io_types.table_metadata] tbl_meta = \ - make_unique[cudf_io_types.table_metadata]() + cdef unique_ptr[table_input_metadata] tbl_meta - cdef vector[string] column_names cdef map[string, string] user_data cdef table_view tv cdef unique_ptr[cudf_io_types.data_sink] _data_sink @@ -295,23 +295,29 @@ cpdef write_parquet( if index is not False and not isinstance(table._index, cudf.RangeIndex): tv = table.view() + tbl_meta = make_unique[table_input_metadata](tv) for level, idx_name in enumerate(table._index.names): - column_names.push_back( + tbl_meta.get().column_metadata[level].set_name( str.encode( _index_level_name(idx_name, level, table._column_names) ) ) + num_index_cols_meta = len(table._index.names) else: tv = table.data_view() + tbl_meta = make_unique[table_input_metadata](tv) + num_index_cols_meta = 0 - for col_name in table._column_names: - column_names.push_back(str.encode(col_name)) + for i, name in enumerate(table._column_names, num_index_cols_meta): + tbl_meta.get().column_metadata[i].set_name(name.encode()) + _set_col_children_names( + table[name]._column, tbl_meta.get().column_metadata[i] + ) pandas_metadata = generate_pandas_metadata(table, index) user_data[str.encode("pandas")] = str.encode(pandas_metadata) # Set the table_metadata - tbl_meta.get().column_names = column_names tbl_meta.get().user_data = user_data cdef cudf_io_types.compression_type comp_type = _get_comp_type(compression) @@ -357,6 +363,7 @@ cdef class ParquetWriter: """ cdef bool initialized cdef unique_ptr[cpp_parquet_chunked_writer] writer + cdef unique_ptr[table_input_metadata] tbl_meta cdef cudf_io_types.sink_info sink cdef unique_ptr[cudf_io_types.data_sink] _data_sink cdef cudf_io_types.statistics_freq stat_freq @@ -416,20 +423,44 @@ cdef class ParquetWriter: def _initialize_chunked_state(self, Table table): """ Prepares all the values required to build the chunked_parquet_writer_options and creates a writer""" - cdef unique_ptr[cudf_io_types.table_metadata_with_nullability] tbl_meta - tbl_meta = make_unique[cudf_io_types.table_metadata_with_nullability]() + cdef table_view tv # Set the table_metadata - tbl_meta.get().column_names = get_column_names(table, self.index) + num_index_cols_meta = 0 + self.tbl_meta = make_unique[table_input_metadata](table.data_view()) + if self.index is not False: + if isinstance(table._index, cudf.core.multiindex.MultiIndex): + tv = table.view() + self.tbl_meta = make_unique[table_input_metadata](tv) + for level, idx_name in enumerate(table._index.names): + self.tbl_meta.get().column_metadata[level].set_name( + (str.encode(idx_name)) + ) + num_index_cols_meta = len(table._index.names) + else: + if table._index.name is not None: + tv = table.view() + self.tbl_meta = make_unique[table_input_metadata](tv) + self.tbl_meta.get().column_metadata[0].set_name( + str.encode(table._index.name) + ) + num_index_cols_meta = 1 + + for i, name in enumerate(table._column_names, num_index_cols_meta): + self.tbl_meta.get().column_metadata[i].set_name(name.encode()) + _set_col_children_names( + table[name]._column, self.tbl_meta.get().column_metadata[i] + ) + pandas_metadata = generate_pandas_metadata(table, self.index) - tbl_meta.get().user_data[str.encode("pandas")] = \ + self.tbl_meta.get().user_data[str.encode("pandas")] = \ str.encode(pandas_metadata) cdef chunked_parquet_writer_options args with nogil: args = move( chunked_parquet_writer_options.builder(self.sink) - .nullable_metadata(tbl_meta.get()) + .metadata(self.tbl_meta.get()) .compression(self.comp_type) .stats_level(self.stat_freq) .build() @@ -514,3 +545,15 @@ cdef Column _update_column_struct_field_names( ) col.set_base_children(tuple(children)) return col + +cdef _set_col_children_names(Column col, column_in_metadata& col_meta): + if is_struct_dtype(col): + for i, (child_col, name) in enumerate( + zip(col.children, list(col.dtype.fields)) + ): + col_meta.child(i).set_name(name.encode()) + _set_col_children_names(child_col, col_meta.child(i)) + elif is_list_dtype(col): + _set_col_children_names(col.children[1], col_meta.child(1)) + else: + return diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx index 4c4ef17c6b9..6698a47b416 100644 --- a/python/cudf/cudf/_lib/utils.pyx +++ b/python/cudf/cudf/_lib/utils.pyx @@ -22,6 +22,7 @@ from cudf.utils.dtypes import ( np_to_pa_dtype, is_categorical_dtype, is_list_dtype, + is_struct_dtype, ) @@ -79,7 +80,7 @@ cpdef generate_pandas_metadata(Table table, index): "'category' column dtypes are currently not " + "supported by the gpu accelerated parquet writer" ) - elif is_list_dtype(col): + elif is_list_dtype(col) or is_struct_dtype(col): types.append(col.dtype.to_arrow()) else: types.append(np_to_pa_dtype(col.dtype)) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index ecdce9443a1..8d1b55f61de 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -7318,15 +7318,6 @@ def to_parquet(self, path, *args, **kwargs): """{docstring}""" from cudf.io import parquet as pq - if any( - isinstance(col, cudf.core.column.StructColumn) - for col in self._data.columns - ): - raise NotImplementedError( - "Writing to parquet format is not yet supported " - "with Struct columns." - ) - return pq.to_parquet(self, path, *args, **kwargs) @ioutils.doc_to_feather() diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index dc4d0615a7f..6d50e4b6fee 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -1834,3 +1834,89 @@ def test_parquet_writer_list_statistics(tmpdir): actual_max = cudf.Series(pd_slice[col].explode().explode()).max() stats_max = stats.max assert normalized_equals(actual_max, stats_max) + + +@pytest.mark.parametrize( + "data", + [ + # Structs + { + "being": [ + None, + {"human?": True, "Deets": {"Name": "Carrot", "Age": 27}}, + {"human?": None, "Deets": {"Name": "Angua", "Age": 25}}, + {"human?": False, "Deets": {"Name": "Cheery", "Age": 31}}, + {"human?": False, "Deets": None}, + {"human?": None, "Deets": {"Name": "Mr", "Age": None}}, + ] + }, + # List of Structs + pytest.param( + { + "family": [ + [ + None, + {"human?": True, "deets": {"weight": 2.4, "age": 27}}, + ], + [ + {"human?": None, "deets": {"weight": 5.3, "age": 25}}, + {"human?": False, "deets": {"weight": 8.0, "age": 31}}, + {"human?": False, "deets": None}, + ], + [], + [{"human?": None, "deets": {"weight": 6.9, "age": None}}], + ] + }, + marks=pytest.mark.xfail( + reason="https://github.com/rapidsai/cudf/issues/7561" + ), + ), + # Struct of Lists + pytest.param( + { + "Real estate records": [ + None, + { + "Status": "NRI", + "Ownerships": { + "land_unit": [None, 2, None], + "flats": [[1, 2, 3], [], [4, 5], [], [0, 6, 0]], + }, + }, + { + "Status": None, + "Ownerships": { + "land_unit": [4, 5], + "flats": [[7, 8], []], + }, + }, + { + "Status": "RI", + "Ownerships": {"land_unit": None, "flats": [[]]}, + }, + {"Status": "RI", "Ownerships": None}, + { + "Status": None, + "Ownerships": { + "land_unit": [7, 8, 9], + "flats": [[], [], []], + }, + }, + ] + }, + marks=pytest.mark.xfail( + reason="https://github.com/rapidsai/cudf/issues/7562" + ), + ), + ], +) +def test_parquet_writer_nested(tmpdir, data): + expect = pd.DataFrame(data) + gdf = cudf.from_pandas(expect) + + fname = tmpdir.join("test_parquet_writer_nested.parquet") + gdf.to_parquet(fname) + assert os.path.exists(fname) + + got = pd.read_parquet(fname) + assert_eq(expect, got)