From 67f8d700a0e0036540d8d3f7a308c7b4bc106ae1 Mon Sep 17 00:00:00 2001 From: vuule Date: Tue, 25 Jan 2022 16:42:35 -0800 Subject: [PATCH 1/3] remove the option to disable decimal128 in the ORC reader --- cpp/include/cudf/io/orc.hpp | 24 ------------------------ cpp/src/io/orc/reader_impl.cu | 11 +++-------- cpp/src/io/orc/reader_impl.hpp | 1 - python/cudf/cudf/_lib/cpp/io/orc.pxd | 1 - 4 files changed, 3 insertions(+), 34 deletions(-) diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 108251dd646..29fc36ce121 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -72,7 +72,6 @@ class orc_reader_options { // Columns that should be read as Decimal128 std::vector _decimal128_columns; - bool _enable_decimal128 = true; friend orc_reader_options_builder; @@ -152,11 +151,6 @@ class orc_reader_options { */ std::vector const& get_decimal128_columns() const { return _decimal128_columns; } - /** - * @brief Whether to use row index to speed-up reading. - */ - bool is_enabled_decimal128() const { return _enable_decimal128; } - // Setters /** @@ -231,13 +225,6 @@ class orc_reader_options { _decimal_cols_as_float = std::move(val); } - /** - * @brief Enable/Disable the use of decimal128 type - * - * @param use Boolean value to enable/disable. - */ - void enable_decimal128(bool use) { _enable_decimal128 = use; } - /** * @brief Set columns that should be read as 128-bit Decimal * @@ -375,17 +362,6 @@ class orc_reader_options_builder { return *this; } - /** - * @brief Enable/Disable use of decimal128 type - * - * @param use Boolean value to enable/disable. - */ - orc_reader_options_builder& decimal128(bool use) - { - options.enable_decimal128(use); - return *this; - } - /** * @brief move orc_reader_options member once it's built. */ diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 817b9fd7b01..a664704829f 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -232,7 +232,6 @@ size_t gather_stream_info(const size_t stripe_index, */ auto decimal_column_type(std::vector const& float64_columns, std::vector const& decimal128_columns, - bool is_decimal128_enabled, cudf::io::orc::detail::aggregate_orc_metadata const& metadata, int column_index) { @@ -244,7 +243,7 @@ auto decimal_column_type(std::vector const& float64_columns, }; auto const user_selected_float64 = is_column_in(float64_columns); - auto const user_selected_decimal128 = is_decimal128_enabled and is_column_in(decimal128_columns); + auto const user_selected_decimal128 = is_column_in(decimal128_columns); CUDF_EXPECTS(not user_selected_float64 or not user_selected_decimal128, "Both decimal128 and float64 types selected for column " + column_path); @@ -255,9 +254,6 @@ auto decimal_column_type(std::vector const& float64_columns, .precision.value_or(cuda::std::numeric_limits::digits10); if (precision <= cuda::std::numeric_limits::digits10) return type_id::DECIMAL32; if (precision <= cuda::std::numeric_limits::digits10) return type_id::DECIMAL64; - CUDF_EXPECTS(is_decimal128_enabled, - "Decimal precision too high for decimal64, use `decimal_cols_as_float` or enable " - "decimal128 use"); return type_id::DECIMAL128; } @@ -755,7 +751,7 @@ std::unique_ptr reader::impl::create_empty_column(const size_type orc_co _use_np_dtypes, _timestamp_type.id(), decimal_column_type( - _decimal_cols_as_float, decimal128_columns, is_decimal128_enabled, _metadata, orc_col_id)); + _decimal_cols_as_float, decimal128_columns, _metadata, orc_col_id)); int32_t scale = 0; std::vector> child_columns; std::unique_ptr out_col = nullptr; @@ -900,7 +896,6 @@ reader::impl::impl(std::vector>&& sources, // Control decimals conversion _decimal_cols_as_float = options.get_decimal_cols_as_float(); decimal128_columns = options.get_decimal128_columns(); - is_decimal128_enabled = options.is_enabled_decimal128(); } timezone_table reader::impl::compute_timezone_table( @@ -965,7 +960,7 @@ table_with_metadata reader::impl::read(size_type skip_rows, _use_np_dtypes, _timestamp_type.id(), decimal_column_type( - _decimal_cols_as_float, decimal128_columns, is_decimal128_enabled, _metadata, col.id)); + _decimal_cols_as_float, decimal128_columns, _metadata, col.id)); CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type"); if (col_type == type_id::DECIMAL32 or col_type == type_id::DECIMAL64 or col_type == type_id::DECIMAL128) { diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index e8aa298012b..1e586bcde00 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -223,7 +223,6 @@ class reader::impl { bool _use_np_dtypes{true}; std::vector _decimal_cols_as_float; std::vector decimal128_columns; - bool is_decimal128_enabled{true}; data_type _timestamp_type{type_id::EMPTY}; reader_column_meta _col_meta{}; }; diff --git a/python/cudf/cudf/_lib/cpp/io/orc.pxd b/python/cudf/cudf/_lib/cpp/io/orc.pxd index e5a8bb926c1..430a592bff2 100644 --- a/python/cudf/cudf/_lib/cpp/io/orc.pxd +++ b/python/cudf/cudf/_lib/cpp/io/orc.pxd @@ -37,7 +37,6 @@ cdef extern from "cudf/io/orc.hpp" \ void enable_use_np_dtypes(bool val) except+ void set_timestamp_type(data_type type) except+ void set_decimal_cols_as_float(vector[string] val) except+ - void enable_decimal128(bool val) except+ @staticmethod orc_reader_options_builder builder( From 385984a67991ff481905950abfc5ff0fb33c6011 Mon Sep 17 00:00:00 2001 From: vuule Date: Tue, 25 Jan 2022 16:56:08 -0800 Subject: [PATCH 2/3] style --- cpp/src/io/orc/reader_impl.cu | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index a664704829f..f133b79a27e 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -750,8 +750,7 @@ std::unique_ptr reader::impl::create_empty_column(const size_type orc_co _metadata.get_schema(orc_col_id), _use_np_dtypes, _timestamp_type.id(), - decimal_column_type( - _decimal_cols_as_float, decimal128_columns, _metadata, orc_col_id)); + decimal_column_type(_decimal_cols_as_float, decimal128_columns, _metadata, orc_col_id)); int32_t scale = 0; std::vector> child_columns; std::unique_ptr out_col = nullptr; @@ -959,8 +958,7 @@ table_with_metadata reader::impl::read(size_type skip_rows, _metadata.get_col_type(col.id), _use_np_dtypes, _timestamp_type.id(), - decimal_column_type( - _decimal_cols_as_float, decimal128_columns, _metadata, col.id)); + decimal_column_type(_decimal_cols_as_float, decimal128_columns, _metadata, col.id)); CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type"); if (col_type == type_id::DECIMAL32 or col_type == type_id::DECIMAL64 or col_type == type_id::DECIMAL128) { From 8bb57f64bfb5abb684ca965bd13e32fd30f639ba Mon Sep 17 00:00:00 2001 From: vuule Date: Wed, 26 Jan 2022 09:27:25 -0800 Subject: [PATCH 3/3] remove missed declaration --- python/cudf/cudf/_lib/cpp/io/orc.pxd | 1 - 1 file changed, 1 deletion(-) diff --git a/python/cudf/cudf/_lib/cpp/io/orc.pxd b/python/cudf/cudf/_lib/cpp/io/orc.pxd index 430a592bff2..0c2f971a26c 100644 --- a/python/cudf/cudf/_lib/cpp/io/orc.pxd +++ b/python/cudf/cudf/_lib/cpp/io/orc.pxd @@ -58,7 +58,6 @@ cdef extern from "cudf/io/orc.hpp" \ orc_reader_options_builder& decimal_cols_as_float( vector[string] val ) except+ - orc_reader_options_builder& decimal128(bool val) except+ orc_reader_options build() except+