diff --git a/cpp/src/io/parquet/compact_protocol_reader.cpp b/cpp/src/io/parquet/compact_protocol_reader.cpp index 312a5243687..d276e946a51 100644 --- a/cpp/src/io/parquet/compact_protocol_reader.cpp +++ b/cpp/src/io/parquet/compact_protocol_reader.cpp @@ -309,10 +309,10 @@ class parquet_field_struct : public parquet_field { template class parquet_field_union_struct : public parquet_field { E& enum_val; - cuda::std::optional& val; // union structs are always wrapped in std::optional + std::optional& val; // union structs are always wrapped in std::optional public: - parquet_field_union_struct(int f, E& ev, cuda::std::optional& v) + parquet_field_union_struct(int f, E& ev, std::optional& v) : parquet_field(f), enum_val(ev), val(v) { } @@ -439,10 +439,10 @@ class parquet_field_struct_blob : public parquet_field { */ template class parquet_field_optional : public parquet_field { - cuda::std::optional& val; + std::optional& val; public: - parquet_field_optional(int f, cuda::std::optional& v) : parquet_field(f), val(v) {} + parquet_field_optional(int f, std::optional& v) : parquet_field(f), val(v) {} inline void operator()(CompactProtocolReader* cpr, int field_type) { diff --git a/cpp/src/io/parquet/parquet.hpp b/cpp/src/io/parquet/parquet.hpp index 7c985643887..2851ef67a65 100644 --- a/cpp/src/io/parquet/parquet.hpp +++ b/cpp/src/io/parquet/parquet.hpp @@ -20,8 +20,6 @@ #include -#include - #include #include #include @@ -94,10 +92,10 @@ struct LogicalType { BSON }; Type type; - cuda::std::optional decimal_type; - cuda::std::optional time_type; - cuda::std::optional timestamp_type; - cuda::std::optional int_type; + std::optional decimal_type; + std::optional time_type; + std::optional timestamp_type; + std::optional int_type; LogicalType(Type tp = UNDEFINED) : type(tp) {} LogicalType(DecimalType&& dt) : type(DECIMAL), decimal_type(dt) {} @@ -178,21 +176,21 @@ struct SchemaElement { // 5: nested fields int32_t num_children = 0; // 6: DEPRECATED: record the original type before conversion to parquet type - cuda::std::optional converted_type; + std::optional converted_type; // 7: DEPRECATED: record the scale for DECIMAL converted type int32_t decimal_scale = 0; // 8: DEPRECATED: record the precision for DECIMAL converted type int32_t decimal_precision = 0; // 9: save field_id from original schema - cuda::std::optional field_id; + std::optional field_id; // 10: replaces converted type - cuda::std::optional logical_type; + std::optional logical_type; // extra cudf specific fields bool output_as_byte_array = false; // cudf type determined from arrow:schema - cuda::std::optional arrow_type; + std::optional arrow_type; // The following fields are filled in later during schema initialization int max_definition_level = 0; @@ -258,21 +256,21 @@ struct SchemaElement { */ struct Statistics { // deprecated max value in signed comparison order - cuda::std::optional> max; + std::optional> max; // deprecated min value in signed comparison order - cuda::std::optional> min; + std::optional> min; // count of null values in the column - cuda::std::optional null_count; + std::optional null_count; // count of distinct values occurring - cuda::std::optional distinct_count; + std::optional distinct_count; // max value for column determined by ColumnOrder - cuda::std::optional> max_value; + std::optional> max_value; // min value for column determined by ColumnOrder - cuda::std::optional> min_value; + std::optional> min_value; // If true, max_value is the actual maximum value for a column - cuda::std::optional is_max_value_exact; + std::optional is_max_value_exact; // If true, min_value is the actual minimum value for a column - cuda::std::optional is_min_value_exact; + std::optional is_min_value_exact; }; /** @@ -281,7 +279,7 @@ struct Statistics { struct SizeStatistics { // Number of variable-width bytes stored for the page/chunk. Should not be set for anything // but the BYTE_ARRAY physical type. - cuda::std::optional unencoded_byte_array_data_bytes; + std::optional unencoded_byte_array_data_bytes; /** * When present, there is expected to be one element corresponding to each * repetition (i.e. size=max repetition_level+1) where each element @@ -290,14 +288,14 @@ struct SizeStatistics { * * This value should not be written if max_repetition_level is 0. */ - cuda::std::optional> repetition_level_histogram; + std::optional> repetition_level_histogram; /** * Same as repetition_level_histogram except for definition levels. * * This value should not be written if max_definition_level is 0 or 1. */ - cuda::std::optional> definition_level_histogram; + std::optional> definition_level_histogram; }; /** @@ -318,7 +316,7 @@ struct OffsetIndex { std::vector page_locations; // per-page size info. see description of the same field in SizeStatistics. only present for // columns with a BYTE_ARRAY physical type. - cuda::std::optional> unencoded_byte_array_data_bytes; + std::optional> unencoded_byte_array_data_bytes; }; /** @@ -329,11 +327,11 @@ struct ColumnIndex { std::vector> min_values; // lower bound for values in each page std::vector> max_values; // upper bound for values in each page BoundaryOrder boundary_order = - BoundaryOrder::UNORDERED; // Indicates if min and max values are ordered - cuda::std::optional> null_counts; // Optional count of null values per page + BoundaryOrder::UNORDERED; // Indicates if min and max values are ordered + std::optional> null_counts; // Optional count of null values per page // Repetition/definition level histograms for the column chunk - cuda::std::optional> repetition_level_histogram; - cuda::std::optional> definition_level_histogram; + std::optional> repetition_level_histogram; + std::optional> definition_level_histogram; }; /** @@ -383,11 +381,11 @@ struct ColumnChunkMetaData { Statistics statistics; // Set of all encodings used for pages in this column chunk. This information can be used to // determine if all data pages are dictionary encoded for example. - cuda::std::optional> encoding_stats; + std::optional> encoding_stats; // Optional statistics to help estimate total memory when converted to in-memory representations. // The histograms contained in these statistics can also be useful in some cases for more // fine-grained nullability/list length filter pushdown. - cuda::std::optional size_statistics; + std::optional size_statistics; }; /** @@ -429,13 +427,13 @@ struct RowGroup { int64_t num_rows = 0; // If set, specifies a sort ordering of the rows in this RowGroup. // The sorting columns can be a subset of all the columns. - cuda::std::optional> sorting_columns; + std::optional> sorting_columns; // Byte offset from beginning of file to first page (data or dictionary) in this row group - cuda::std::optional file_offset; + std::optional file_offset; // Total byte size of all compressed (and potentially encrypted) column data in this row group - cuda::std::optional total_compressed_size; + std::optional total_compressed_size; // Row group ordinal in the file - cuda::std::optional ordinal; + std::optional ordinal; }; /** @@ -460,7 +458,7 @@ struct FileMetaData { std::vector row_groups; std::vector key_value_metadata; std::string created_by = ""; - cuda::std::optional> column_orders; + std::optional> column_orders; }; /** diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index a8ba3a969ce..4f6d41a97da 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -395,7 +395,7 @@ struct ColumnChunkDesc { uint8_t def_level_bits_, uint8_t rep_level_bits_, Compression codec_, - cuda::std::optional logical_type_, + std::optional logical_type_, int32_t ts_clock_rate_, int32_t src_col_index_, int32_t src_col_schema_, @@ -441,12 +441,12 @@ struct ColumnChunkDesc { int32_t num_data_pages{}; // number of data pages int32_t num_dict_pages{}; // number of dictionary pages PageInfo const* dict_page{}; - string_index_pair* str_dict_index{}; // index for string dictionary - bitmask_type** valid_map_base{}; // base pointers of valid bit map for this column - void** column_data_base{}; // base pointers of column data - void** column_string_base{}; // base pointers of column string data - Compression codec{}; // compressed codec enum - cuda::std::optional logical_type{}; // logical type + string_index_pair* str_dict_index{}; // index for string dictionary + bitmask_type** valid_map_base{}; // base pointers of valid bit map for this column + void** column_data_base{}; // base pointers of column data + void** column_string_base{}; // base pointers of column string data + Compression codec{}; // compressed codec enum + std::optional logical_type{}; // logical type int32_t ts_clock_rate{}; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns) int32_t src_col_index{}; // my input column index diff --git a/cpp/src/io/parquet/predicate_pushdown.cpp b/cpp/src/io/parquet/predicate_pushdown.cpp index b90ca36c8c7..f0a0bc0b51b 100644 --- a/cpp/src/io/parquet/predicate_pushdown.cpp +++ b/cpp/src/io/parquet/predicate_pushdown.cpp @@ -152,7 +152,7 @@ struct stats_caster { } void set_index(size_type index, - cuda::std::optional> const& binary_value, + std::optional> const& binary_value, Type const type) { if (binary_value.has_value()) { @@ -234,8 +234,8 @@ struct stats_caster { max.set_index(stats_idx, max_value, colchunk.meta_data.type); } else { // Marking it null, if column present in row group - min.set_index(stats_idx, cuda::std::nullopt, {}); - max.set_index(stats_idx, cuda::std::nullopt, {}); + min.set_index(stats_idx, std::nullopt, {}); + max.set_index(stats_idx, std::nullopt, {}); } stats_idx++; } diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index 1b69ccb7742..f0865c715bc 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -38,7 +38,7 @@ namespace { // be treated as a string. Currently the only logical type that has special handling is DECIMAL. // Other valid types in the future would be UUID (still treated as string) and FLOAT16 (which // for now would also be treated as a string). -inline bool is_treat_fixed_length_as_string(cuda::std::optional const& logical_type) +inline bool is_treat_fixed_length_as_string(std::optional const& logical_type) { if (!logical_type.has_value()) { return true; } return logical_type->type != LogicalType::DECIMAL; diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu index c588fedb85c..27312a4da89 100644 --- a/cpp/src/io/parquet/reader_impl_chunking.cu +++ b/cpp/src/io/parquet/reader_impl_chunking.cu @@ -371,11 +371,11 @@ int64_t find_next_split(int64_t cur_pos, * * @return A tuple of Parquet clock rate and Parquet decimal type. */ -[[nodiscard]] std::tuple> conversion_info( +[[nodiscard]] std::tuple> conversion_info( type_id column_type_id, type_id timestamp_type_id, Type physical, - cuda::std::optional logical_type) + std::optional logical_type) { int32_t const clock_rate = is_chrono(data_type{column_type_id}) ? to_clockrate(timestamp_type_id) : 0; @@ -386,11 +386,11 @@ int64_t find_next_split(int64_t cur_pos, // if decimal but not outputting as float or decimal, then convert to no logical type if (column_type_id != type_id::FLOAT64 and not cudf::is_fixed_point(data_type{column_type_id})) { - return std::make_tuple(clock_rate, cuda::std::nullopt); + return {clock_rate, std::nullopt}; } } - return std::make_tuple(clock_rate, std::move(logical_type)); + return {clock_rate, std::move(logical_type)}; } /** diff --git a/cpp/src/io/parquet/reader_impl_helpers.cpp b/cpp/src/io/parquet/reader_impl_helpers.cpp index 6d566b5815e..a6562d33de2 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.cpp +++ b/cpp/src/io/parquet/reader_impl_helpers.cpp @@ -38,7 +38,7 @@ namespace flatbuf = cudf::io::parquet::flatbuf; namespace { -cuda::std::optional converted_to_logical_type(SchemaElement const& schema) +std::optional converted_to_logical_type(SchemaElement const& schema) { if (schema.converted_type.has_value()) { switch (schema.converted_type.value()) { @@ -66,7 +66,7 @@ cuda::std::optional converted_to_logical_type(SchemaElement const& default: return LogicalType{LogicalType::UNDEFINED}; } } - return cuda::std::nullopt; + return std::nullopt; } } // namespace @@ -246,7 +246,7 @@ void metadata::sanitize_schema() struct_elem.repetition_type = REQUIRED; struct_elem.num_children = schema_elem.num_children; struct_elem.type = UNDEFINED_TYPE; - struct_elem.converted_type = cuda::std::nullopt; + struct_elem.converted_type = std::nullopt; // swap children struct_elem.children_idx = std::move(schema_elem.children_idx); diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index ec05f35d405..190f13eb688 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -186,7 +186,7 @@ struct aggregate_writer_metadata { std::vector> column_indexes; }; std::vector files; - cuda::std::optional> column_orders = cuda::std::nullopt; + std::optional> column_orders = std::nullopt; }; namespace { @@ -472,7 +472,7 @@ struct leaf_schema_fn { std::enable_if_t, void> operator()() { col_schema.type = (timestamp_is_int96) ? Type::INT96 : Type::INT64; - col_schema.converted_type = cuda::std::nullopt; + col_schema.converted_type = std::nullopt; col_schema.stats_dtype = statistics_dtype::dtype_timestamp64; if (timestamp_is_int96) { col_schema.ts_scale = -1000; // negative value indicates division by absolute value @@ -750,7 +750,7 @@ std::vector construct_parquet_schema_tree( col_schema.type = Type::BYTE_ARRAY; } - col_schema.converted_type = cuda::std::nullopt; + col_schema.converted_type = std::nullopt; col_schema.stats_dtype = statistics_dtype::dtype_byte_array; col_schema.repetition_type = col_nullable ? OPTIONAL : REQUIRED; col_schema.name = (schema[parent_idx].name == "list") ? "element" : col_meta.get_name(); @@ -2795,7 +2795,7 @@ std::unique_ptr> writer::merge_row_group_metadata( // See https://github.com/rapidsai/cudf/pull/14264#issuecomment-1778311615 for (auto& se : md.schema) { if (se.logical_type.has_value() && se.logical_type.value().type == LogicalType::UNKNOWN) { - se.logical_type = cuda::std::nullopt; + se.logical_type = std::nullopt; } } diff --git a/cpp/tests/io/parquet_common.cpp b/cpp/tests/io/parquet_common.cpp index 6141a40bc95..a1b8677eac8 100644 --- a/cpp/tests/io/parquet_common.cpp +++ b/cpp/tests/io/parquet_common.cpp @@ -744,7 +744,7 @@ int32_t compare(T& v1, T& v2) int32_t compare_binary(std::vector const& v1, std::vector const& v2, cudf::io::parquet::detail::Type ptype, - cuda::std::optional const& ctype) + std::optional const& ctype) { auto ctype_val = ctype.value_or(cudf::io::parquet::detail::UNKNOWN); switch (ptype) { diff --git a/cpp/tests/io/parquet_common.hpp b/cpp/tests/io/parquet_common.hpp index bd1579eaa1b..c90b81ed27a 100644 --- a/cpp/tests/io/parquet_common.hpp +++ b/cpp/tests/io/parquet_common.hpp @@ -172,7 +172,7 @@ std::pair create_parquet_typed_with_stats(std::string int32_t compare_binary(std::vector const& v1, std::vector const& v2, cudf::io::parquet::detail::Type ptype, - cuda::std::optional const& ctype); + std::optional const& ctype); void expect_compression_stats_empty(std::shared_ptr stats);