From c1d023e68426cfab971612b0b5e4729d41b570a8 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 28 Sep 2023 10:40:39 -0700 Subject: [PATCH] Fix the precision when converting decimal128 to arrow --- cpp/src/interop/to_arrow.cu | 3 ++- cpp/tests/interop/arrow_utils.hpp | 3 ++- cpp/tests/interop/from_arrow_test.cpp | 8 ++++---- cpp/tests/interop/to_arrow_test.cpp | 19 +++++++++++-------- 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu index 0cd750bc947..737266f5f12 100644 --- a/cpp/src/interop/to_arrow.cu +++ b/cpp/src/interop/to_arrow.cu @@ -220,6 +220,7 @@ std::shared_ptr dispatch_to_arrow::operator() rmm::cuda_stream_view stream) { using DeviceType = __int128_t; + auto constexpr max_precision = 38; rmm::device_uvector buf(input.size(), stream); @@ -234,7 +235,7 @@ std::shared_ptr dispatch_to_arrow::operator() CUDF_CUDA_TRY(cudaMemcpyAsync( data_buffer->mutable_data(), buf.data(), buf_size_in_bytes, cudaMemcpyDefault, stream.value())); - auto type = arrow::decimal(18, -input.type().scale()); + auto type = arrow::decimal(max_precision, -input.type().scale()); auto mask = fetch_mask_buffer(input, ar_mr, stream); auto buffers = std::vector>{mask, std::move(data_buffer)}; auto data = std::make_shared(type, input.size(), buffers); diff --git a/cpp/tests/interop/arrow_utils.hpp b/cpp/tests/interop/arrow_utils.hpp index fc8f5b37f7e..2e4eaa264a1 100644 --- a/cpp/tests/interop/arrow_utils.hpp +++ b/cpp/tests/interop/arrow_utils.hpp @@ -181,12 +181,13 @@ std::pair, std::shared_ptr> get_table template [[nodiscard]] auto make_decimal128_arrow_array(std::vector const& data, std::optional> const& validity, + int32_t max_precision, int32_t scale) -> std::shared_ptr { auto constexpr BIT_WIDTH_RATIO = sizeof(__int128_t) / sizeof(T); std::shared_ptr arr; - arrow::Decimal128Builder decimal_builder(arrow::decimal(18, -scale), + arrow::Decimal128Builder decimal_builder(arrow::decimal(max_precision, -scale), arrow::default_memory_pool()); for (T i = 0; i < static_cast(data.size() / BIT_WIDTH_RATIO); ++i) { diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp index a898106a5b2..9bdd51583cb 100644 --- a/cpp/tests/interop/from_arrow_test.cpp +++ b/cpp/tests/interop/from_arrow_test.cpp @@ -360,7 +360,7 @@ TEST_F(FromArrowTest, FixedPoint128Table) auto const col = fp_wrapper<__int128_t>(data.cbegin(), data.cend(), scale_type{scale}); auto const expected = cudf::table_view({col}); - auto const arr = make_decimal128_arrow_array(data, std::nullopt, scale); + auto const arr = make_decimal128_arrow_array(data, std::nullopt, 38, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -384,7 +384,7 @@ TEST_F(FromArrowTest, FixedPoint128TableLarge) auto const col = fp_wrapper<__int128_t>(iota, iota + NUM_ELEMENTS, scale_type{scale}); auto const expected = cudf::table_view({col}); - auto const arr = make_decimal128_arrow_array(data, std::nullopt, scale); + auto const arr = make_decimal128_arrow_array(data, std::nullopt, 38, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -408,7 +408,7 @@ TEST_F(FromArrowTest, FixedPoint128TableNulls) fp_wrapper<__int128_t>({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale}); auto const expected = cudf::table_view({col}); - auto const arr = make_decimal128_arrow_array(data, validity, scale); + auto const arr = make_decimal128_arrow_array(data, validity, 38, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -435,7 +435,7 @@ TEST_F(FromArrowTest, FixedPoint128TableNullsLarge) auto const expected = cudf::table_view({col}); auto const arr = make_decimal128_arrow_array( - data, std::vector(validity, validity + NUM_ELEMENTS), scale); + data, std::vector(validity, validity + NUM_ELEMENTS), 38, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp index 6bb4cdfd747..0ed67e935fc 100644 --- a/cpp/tests/interop/to_arrow_test.cpp +++ b/cpp/tests/interop/to_arrow_test.cpp @@ -355,6 +355,9 @@ TEST_F(ToArrowTest, StructColumn) template using fp_wrapper = cudf::test::fixed_point_column_wrapper; +auto constexpr decimal64_max_precision = 18; +auto constexpr decimal128_max_precision = 38; + TEST_F(ToArrowTest, FixedPoint64Table) { using namespace numeric; @@ -364,7 +367,7 @@ TEST_F(ToArrowTest, FixedPoint64Table) auto const input = cudf::table_view({col}); auto const expect_data = std::vector{-1, -1, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0}; - auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, scale); + auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, decimal64_max_precision, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -386,7 +389,7 @@ TEST_F(ToArrowTest, FixedPoint128Table) auto const input = cudf::table_view({col}); auto const expect_data = std::vector<__int128_t>{-1, 2, 3, 4, 5, 6}; - auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, scale); + auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, decimal128_max_precision, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -415,7 +418,7 @@ TEST_F(ToArrowTest, FixedPoint64TableLarge) auto const expect_data = std::vector{transform, transform + NUM_ELEMENTS * BIT_WIDTH_RATIO}; - auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, scale); + auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, decimal64_max_precision, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -439,7 +442,7 @@ TEST_F(ToArrowTest, FixedPoint128TableLarge) auto const input = cudf::table_view({col}); auto const expect_data = std::vector<__int128_t>{iota, iota + NUM_ELEMENTS}; - auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, scale); + auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, decimal128_max_precision, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -463,7 +466,7 @@ TEST_F(ToArrowTest, FixedPoint64TableNullsSimple) fp_wrapper({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale}); auto const input = cudf::table_view({col}); - auto const arr = make_decimal128_arrow_array(data, validity, scale); + auto const arr = make_decimal128_arrow_array(data, validity, decimal64_max_precision, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -487,7 +490,7 @@ TEST_F(ToArrowTest, FixedPoint128TableNullsSimple) fp_wrapper<__int128_t>({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale}); auto const input = cudf::table_view({col}); - auto const arr = make_decimal128_arrow_array(data, validity, scale); + auto const arr = make_decimal128_arrow_array(data, validity, decimal128_max_precision, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -513,7 +516,7 @@ TEST_F(ToArrowTest, FixedPoint64TableNulls) std::vector{1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 8, 0, 9, 0, 10, 0}; auto const validity = std::vector{1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; - auto arr = make_decimal128_arrow_array(expect_data, validity, scale); + auto arr = make_decimal128_arrow_array(expect_data, validity, decimal64_max_precision, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -538,7 +541,7 @@ TEST_F(ToArrowTest, FixedPoint128TableNulls) auto const expect_data = std::vector<__int128_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; auto const validity = std::vector{1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; - auto arr = make_decimal128_arrow_array(expect_data, validity, scale); + auto arr = make_decimal128_arrow_array(expect_data, validity, decimal128_max_precision, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field});