Skip to content

Commit

Permalink
Fix the precision when converting decimal128 to arrow
Browse files Browse the repository at this point in the history
  • Loading branch information
jihoonson committed Sep 28, 2023
1 parent 7825790 commit c1d023e
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 14 deletions.
3 changes: 2 additions & 1 deletion cpp/src/interop/to_arrow.cu
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal128>
rmm::cuda_stream_view stream)
{
using DeviceType = __int128_t;
auto constexpr max_precision = 38;

rmm::device_uvector<DeviceType> buf(input.size(), stream);

Expand All @@ -234,7 +235,7 @@ std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal128>
CUDF_CUDA_TRY(cudaMemcpyAsync(
data_buffer->mutable_data(), buf.data(), buf_size_in_bytes, cudaMemcpyDefault, stream.value()));

auto type = arrow::decimal(18, -input.type().scale());
auto type = arrow::decimal(max_precision, -input.type().scale());
auto mask = fetch_mask_buffer(input, ar_mr, stream);
auto buffers = std::vector<std::shared_ptr<arrow::Buffer>>{mask, std::move(data_buffer)};
auto data = std::make_shared<arrow::ArrayData>(type, input.size(), buffers);
Expand Down
3 changes: 2 additions & 1 deletion cpp/tests/interop/arrow_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,12 +181,13 @@ std::pair<std::unique_ptr<cudf::table>, std::shared_ptr<arrow::Table>> get_table
template <typename T>
[[nodiscard]] auto make_decimal128_arrow_array(std::vector<T> const& data,
std::optional<std::vector<int>> const& validity,
int32_t max_precision,
int32_t scale) -> std::shared_ptr<arrow::Array>
{
auto constexpr BIT_WIDTH_RATIO = sizeof(__int128_t) / sizeof(T);

std::shared_ptr<arrow::Array> arr;
arrow::Decimal128Builder decimal_builder(arrow::decimal(18, -scale),
arrow::Decimal128Builder decimal_builder(arrow::decimal(max_precision, -scale),
arrow::default_memory_pool());

for (T i = 0; i < static_cast<T>(data.size() / BIT_WIDTH_RATIO); ++i) {
Expand Down
8 changes: 4 additions & 4 deletions cpp/tests/interop/from_arrow_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ TEST_F(FromArrowTest, FixedPoint128Table)
auto const col = fp_wrapper<__int128_t>(data.cbegin(), data.cend(), scale_type{scale});
auto const expected = cudf::table_view({col});

auto const arr = make_decimal128_arrow_array(data, std::nullopt, scale);
auto const arr = make_decimal128_arrow_array(data, std::nullopt, 38, scale);

auto const field = arrow::field("a", arr->type());
auto const schema_vector = std::vector<std::shared_ptr<arrow::Field>>({field});
Expand All @@ -384,7 +384,7 @@ TEST_F(FromArrowTest, FixedPoint128TableLarge)
auto const col = fp_wrapper<__int128_t>(iota, iota + NUM_ELEMENTS, scale_type{scale});
auto const expected = cudf::table_view({col});

auto const arr = make_decimal128_arrow_array(data, std::nullopt, scale);
auto const arr = make_decimal128_arrow_array(data, std::nullopt, 38, scale);

auto const field = arrow::field("a", arr->type());
auto const schema_vector = std::vector<std::shared_ptr<arrow::Field>>({field});
Expand All @@ -408,7 +408,7 @@ TEST_F(FromArrowTest, FixedPoint128TableNulls)
fp_wrapper<__int128_t>({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale});
auto const expected = cudf::table_view({col});

auto const arr = make_decimal128_arrow_array(data, validity, scale);
auto const arr = make_decimal128_arrow_array(data, validity, 38, scale);

auto const field = arrow::field("a", arr->type());
auto const schema_vector = std::vector<std::shared_ptr<arrow::Field>>({field});
Expand All @@ -435,7 +435,7 @@ TEST_F(FromArrowTest, FixedPoint128TableNullsLarge)
auto const expected = cudf::table_view({col});

auto const arr = make_decimal128_arrow_array(
data, std::vector<int32_t>(validity, validity + NUM_ELEMENTS), scale);
data, std::vector<int32_t>(validity, validity + NUM_ELEMENTS), 38, scale);

auto const field = arrow::field("a", arr->type());
auto const schema_vector = std::vector<std::shared_ptr<arrow::Field>>({field});
Expand Down
19 changes: 11 additions & 8 deletions cpp/tests/interop/to_arrow_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,9 @@ TEST_F(ToArrowTest, StructColumn)
template <typename T>
using fp_wrapper = cudf::test::fixed_point_column_wrapper<T>;

auto constexpr decimal64_max_precision = 18;
auto constexpr decimal128_max_precision = 38;

TEST_F(ToArrowTest, FixedPoint64Table)
{
using namespace numeric;
Expand All @@ -364,7 +367,7 @@ TEST_F(ToArrowTest, FixedPoint64Table)
auto const input = cudf::table_view({col});
auto const expect_data = std::vector<int64_t>{-1, -1, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0};

auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, scale);
auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, decimal64_max_precision, scale);

auto const field = arrow::field("a", arr->type());
auto const schema_vector = std::vector<std::shared_ptr<arrow::Field>>({field});
Expand All @@ -386,7 +389,7 @@ TEST_F(ToArrowTest, FixedPoint128Table)
auto const input = cudf::table_view({col});
auto const expect_data = std::vector<__int128_t>{-1, 2, 3, 4, 5, 6};

auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, scale);
auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, decimal128_max_precision, scale);

auto const field = arrow::field("a", arr->type());
auto const schema_vector = std::vector<std::shared_ptr<arrow::Field>>({field});
Expand Down Expand Up @@ -415,7 +418,7 @@ TEST_F(ToArrowTest, FixedPoint64TableLarge)
auto const expect_data =
std::vector<int64_t>{transform, transform + NUM_ELEMENTS * BIT_WIDTH_RATIO};

auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, scale);
auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, decimal64_max_precision, scale);

auto const field = arrow::field("a", arr->type());
auto const schema_vector = std::vector<std::shared_ptr<arrow::Field>>({field});
Expand All @@ -439,7 +442,7 @@ TEST_F(ToArrowTest, FixedPoint128TableLarge)
auto const input = cudf::table_view({col});
auto const expect_data = std::vector<__int128_t>{iota, iota + NUM_ELEMENTS};

auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, scale);
auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, decimal128_max_precision, scale);

auto const field = arrow::field("a", arr->type());
auto const schema_vector = std::vector<std::shared_ptr<arrow::Field>>({field});
Expand All @@ -463,7 +466,7 @@ TEST_F(ToArrowTest, FixedPoint64TableNullsSimple)
fp_wrapper<int64_t>({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale});
auto const input = cudf::table_view({col});

auto const arr = make_decimal128_arrow_array(data, validity, scale);
auto const arr = make_decimal128_arrow_array(data, validity, decimal64_max_precision, scale);

auto const field = arrow::field("a", arr->type());
auto const schema_vector = std::vector<std::shared_ptr<arrow::Field>>({field});
Expand All @@ -487,7 +490,7 @@ TEST_F(ToArrowTest, FixedPoint128TableNullsSimple)
fp_wrapper<__int128_t>({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale});
auto const input = cudf::table_view({col});

auto const arr = make_decimal128_arrow_array(data, validity, scale);
auto const arr = make_decimal128_arrow_array(data, validity, decimal128_max_precision, scale);

auto const field = arrow::field("a", arr->type());
auto const schema_vector = std::vector<std::shared_ptr<arrow::Field>>({field});
Expand All @@ -513,7 +516,7 @@ TEST_F(ToArrowTest, FixedPoint64TableNulls)
std::vector<int64_t>{1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 8, 0, 9, 0, 10, 0};
auto const validity = std::vector<int32_t>{1, 0, 1, 0, 1, 0, 1, 0, 1, 0};

auto arr = make_decimal128_arrow_array(expect_data, validity, scale);
auto arr = make_decimal128_arrow_array(expect_data, validity, decimal64_max_precision, scale);

auto const field = arrow::field("a", arr->type());
auto const schema_vector = std::vector<std::shared_ptr<arrow::Field>>({field});
Expand All @@ -538,7 +541,7 @@ TEST_F(ToArrowTest, FixedPoint128TableNulls)
auto const expect_data = std::vector<__int128_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
auto const validity = std::vector<int32_t>{1, 0, 1, 0, 1, 0, 1, 0, 1, 0};

auto arr = make_decimal128_arrow_array(expect_data, validity, scale);
auto arr = make_decimal128_arrow_array(expect_data, validity, decimal128_max_precision, scale);

auto const field = arrow::field("a", arr->type());
auto const schema_vector = std::vector<std::shared_ptr<arrow::Field>>({field});
Expand Down

0 comments on commit c1d023e

Please sign in to comment.