From c95db42e20758fedfb89c28c74c0650e6136aced Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Mon, 19 Apr 2021 17:12:26 -0500 Subject: [PATCH] review changes --- cpp/src/io/orc/reader_impl.cu | 8 ++++++-- cpp/src/io/orc/stripe_data.cu | 7 ++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index adcd155776a..d65c26fb81c 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -426,12 +426,16 @@ table_with_metadata reader::impl::read(size_type skip_rows, std::vector column_types; for (const auto &col : _selected_columns) { auto col_type = to_type_id(_metadata->ff.types[col], _use_np_dtypes, _timestamp_type.id()); - auto scale = (col_type == type_id::DECIMAL64) ? _metadata->ff.types[col].scale : 0; CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type"); // Remove this once we support Decimal128 data type CUDF_EXPECTS((col_type != type_id::DECIMAL64) or (_metadata->ff.types[col].precision <= 18), "Decimal data has precision > 18, Decimal64 data type doesn't support it."); - column_types.emplace_back(col_type, -1 * static_cast(scale)); + // sign of the scale is changed since cuDF follows c++ libraries like CNL + // which uses negative scaling, but liborc and other libraries + // follow positive scaling. + auto scale = + (col_type == type_id::DECIMAL64) ? -static_cast(_metadata->ff.types[col].scale) : 0; + column_types.emplace_back(col_type, scale); // Map each ORC column to its column orc_col_map[col] = column_types.size() - 1; diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index ee1fe7104b1..c77ed121abf 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -991,6 +991,8 @@ static const __device__ __constant__ int64_t kPow5i[28] = {1, * * @param[in] bs Input byte stream * @param[in,out] vals on input: scale from secondary stream, on output: value + * @param[in] val_scale Scale of each value + * @param[in] col_scale Scale from schema to which value will be adjusted * @param[in] numvals Number of values to decode * @param[in] t thread id * @@ -1027,7 +1029,10 @@ static __device__ int Decode_Decimals(orc_bytestream_s *bs, if (t >= num_vals_read and t < num_vals_to_read) { auto const pos = static_cast(vals.i64[t]); int128_s v = decode_varint128(bs, pos); - int32_t scale = (t < numvals) ? col_scale - val_scale : 0; + // Since cuDF column stores just one scale, value needs to + // be adjusted to col_scale from val_scale. So the difference + // of them will be used to add 0s or remove digits. + int32_t scale = (t < numvals) ? col_scale - val_scale : 0; if (scale >= 0) { scale = min(scale, 27); vals.i64[t] = ((int64_t)v.lo * kPow5i[scale]) << scale;