From 3d44ed58ca95f4538b059fe4ac7f9cf17b6b4867 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra <36027403+codereport@users.noreply.github.com> Date: Sun, 8 Nov 2020 16:50:40 -0500 Subject: [PATCH] Implement `cudf::round` `decimal32` & `decimal64` (`HALF_UP` and `HALF_EVEN`) (#6685) This PR is the final of 3 PRs and resolves #3790. Implement cudf::round floating point and integer types (HALF_UP) #6562 Implement cudf::round floating point and integer types (HALF_EVEN) #6647 Implement cudf::round decimal32 & decimal64 (HALF_UP and HALF_EVEN) arrow_backward The PR to do list is as follows: Add code for adjusted type of resulting column Add implementation for half_up positive Add implementation for half_up zero Add implementation for half_up negative Add implementation for half_even positive Add implementation for half_even zero Add implementation for half_even negative Update documentation Add basic unit tests Add comprehensive unit tests Refactor leveraging the factor that EVERY fixed_point function object is identical fire Co-authored-by: Mark Harris --- CHANGELOG.md | 1 + cpp/include/cudf/round.hpp | 5 +- cpp/src/round/round.cu | 95 ++++++++++++--- cpp/tests/round/round_tests.cpp | 203 +++++++++++++++++++++++++++++++- 4 files changed, 281 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 63d5e67cd8b..69485e2394e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - PR #6460 Add is_timestamp format check API - PR #6647 Implement `cudf::round` floating point and integer types (`HALF_EVEN`) - PR #6562 Implement `cudf::round` floating point and integer types (`HALF_UP`) +- PR #6685 Implement `cudf::round` `decimal32` & `decimal64` (`HALF_UP` and `HALF_EVEN`) - PR #6528 Enable `fixed_point` binary operations - PR #6460 Add is_timestamp format check API - PR #6568 Add function to create hashed vocabulary file from raw vocabulary diff --git a/cpp/include/cudf/round.hpp b/cpp/include/cudf/round.hpp index d5128b024c7..0a48e1d1fc8 100644 --- a/cpp/include/cudf/round.hpp +++ b/cpp/include/cudf/round.hpp @@ -38,8 +38,9 @@ enum class rounding_method : int32_t { HALF_UP, HALF_EVEN }; /** * @brief Rounds all the values in a column to the specified number of decimal places. * - * `cudf::round` currently supports HALF_UP and HALF_EVEN rounding for integer and floating point - * numbers. + * `cudf::round` currently supports HALF_UP and HALF_EVEN rounding for integer, floating point and + * `decimal32` and `decimal64` numbers. For `decimal32` and `decimal64` numbers, negated + * `numeric::scale` is equivalent to `decimal_places`. * * Example: * ``` diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index cede699018c..308a8c6d5f5 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -14,12 +14,15 @@ * limitations under the License. */ +#include #include #include #include #include +#include #include #include +#include #include #include #include @@ -67,7 +70,7 @@ int16_t __device__ generic_sign(T value) template constexpr inline auto is_supported_round_type() { - return cudf::is_numeric() && not std::is_same::value; + return (cudf::is_numeric() && not std::is_same::value) || cudf::is_fixed_point(); } template @@ -179,7 +182,22 @@ struct half_even_negative { } }; -template typename RoundFunctor> +template +struct half_up_fixed_point { + T n; + __device__ T operator()(T e) { return half_up_negative{n}(e) / n; } +}; + +template +struct half_even_fixed_point { + T n; + __device__ T operator()(T e) { return half_even_negative{n}(e) / n; } +}; + +template + typename RoundFunctor, + typename std::enable_if_t()>* = nullptr> std::unique_ptr round_with(column_view const& input, int32_t decimal_places, cudaStream_t stream, @@ -190,12 +208,8 @@ std::unique_ptr round_with(column_view const& input, if (decimal_places >= 0 && std::is_integral::value) return std::make_unique(input, stream, mr); - auto result = cudf::make_fixed_width_column(input.type(), // - input.size(), - copy_bitmask(input, stream, mr), - input.null_count(), - stream, - mr); + auto result = cudf::make_fixed_width_column( + input.type(), input.size(), copy_bitmask(input, stream, mr), input.null_count(), stream, mr); auto out_view = result->mutable_view(); T const n = std::pow(10, std::abs(decimal_places)); @@ -209,6 +223,45 @@ std::unique_ptr round_with(column_view const& input, return result; } +template + typename RoundFunctor, + typename std::enable_if_t()>* = nullptr> +std::unique_ptr round_with(column_view const& input, + int32_t decimal_places, + cudaStream_t stream, + rmm::mr::device_memory_resource* mr) +{ + using namespace numeric; + using Type = device_storage_type_t; + using FixedPointRoundFunctor = RoundFunctor; + + // if rounding to more precision than fixed_point is capable of, just need to rescale + // note: decimal_places has the opposite sign of numeric::scale_type (therefore have to negate) + if (input.type().scale() > -decimal_places) { + // TODO replace this cudf::binary_operation with a cudf::cast or cudf::rescale when available + auto const diff = input.type().scale() - (-decimal_places); + auto const scalar = cudf::make_fixed_point_scalar(std::pow(10, diff), scale_type{-diff}); + return cudf::binary_operation(input, *scalar, cudf::binary_operator::MUL, {}, mr); + } + + auto const result_type = data_type{input.type().id(), scale_type{-decimal_places}}; + + auto result = cudf::make_fixed_width_column( + result_type, input.size(), copy_bitmask(input, stream, mr), input.null_count(), stream, mr); + + auto out_view = result->mutable_view(); + Type const n = std::pow(10, std::abs(decimal_places + input.type().scale())); + + thrust::transform(rmm::exec_policy(stream)->on(stream), + input.begin(), + input.end(), + out_view.begin(), + FixedPointRoundFunctor{n}); + + return result; +} + struct round_type_dispatcher { template std::enable_if_t(), std::unique_ptr> operator()( @@ -228,13 +281,15 @@ struct round_type_dispatcher { // clang-format off switch (method) { case cudf::rounding_method::HALF_UP: - if (decimal_places == 0) return round_with(input, decimal_places, stream, mr); - else if (decimal_places > 0) return round_with(input, decimal_places, stream, mr); - else return round_with(input, decimal_places, stream, mr); + if (is_fixed_point()) return round_with(input, decimal_places, stream, mr); + else if (decimal_places == 0) return round_with(input, decimal_places, stream, mr); + else if (decimal_places > 0) return round_with(input, decimal_places, stream, mr); + else return round_with(input, decimal_places, stream, mr); case cudf::rounding_method::HALF_EVEN: - if (decimal_places == 0) return round_with(input, decimal_places, stream, mr); - else if (decimal_places > 0) return round_with(input, decimal_places, stream, mr); - else return round_with(input, decimal_places, stream, mr); + if (is_fixed_point()) return round_with(input, decimal_places, stream, mr); + else if (decimal_places == 0) return round_with(input, decimal_places, stream, mr); + else if (decimal_places > 0) return round_with(input, decimal_places, stream, mr); + else return round_with(input, decimal_places, stream, mr); default: CUDF_FAIL("Undefined rounding method"); } // clang-format on @@ -249,10 +304,16 @@ std::unique_ptr round(column_view const& input, cudaStream_t stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(cudf::is_numeric(input.type()), "Only integral/floating point currently supported."); + CUDF_EXPECTS(cudf::is_numeric(input.type()) || cudf::is_fixed_point(input.type()), + "Only integral/floating point/fixed point currently supported."); - // TODO when fixed_point supported, have to adjust type - if (input.size() == 0) return empty_like(input); + if (input.is_empty()) { + if (is_fixed_point(input.type())) { + auto const type = data_type{input.type().id(), numeric::scale_type{-decimal_places}}; + return std::make_unique(type, 0, rmm::device_buffer{}); + } + return empty_like(input); + } return type_dispatcher( input.type(), round_type_dispatcher{}, input, decimal_places, method, stream, mr); diff --git a/cpp/tests/round/round_tests.cpp b/cpp/tests/round/round_tests.cpp index e7a429e637c..242390ca59b 100644 --- a/cpp/tests/round/round_tests.cpp +++ b/cpp/tests/round/round_tests.cpp @@ -42,6 +42,34 @@ TYPED_TEST_CASE(RoundTestsIntegerTypes, IntegerTypes); TYPED_TEST_CASE(RoundTestsFixedPointTypes, cudf::test::FixedPointTypes); TYPED_TEST_CASE(RoundTestsFloatingPointTypes, cudf::test::FloatingPointTypes); +TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfUpZero) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + auto const input = fp_wrapper{{1140, 1150, 1160, 1240, 1250, 1260}, scale_type{-2}}; + auto const expected = fp_wrapper{{11, 12, 12, 12, 13, 13}, scale_type{0}}; + auto const result = cudf::round(input); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfEvenZero) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + auto const input = fp_wrapper{{1140, 1150, 1160, 1240, 1250, 1260}, scale_type{-2}}; + auto const expected = fp_wrapper{{11, 12, 12, 12, 12, 13}, scale_type{0}}; + auto const result = cudf::round(input, 0, cudf::rounding_method::HALF_EVEN); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfUp) { using namespace numeric; @@ -51,12 +79,179 @@ TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfUp) auto const input = fp_wrapper{{1140, 1150, 1160}, scale_type{-3}}; auto const expected = fp_wrapper{{11, 12, 12}, scale_type{-1}}; + auto const result = cudf::round(input, 1, cudf::rounding_method::HALF_UP); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfUp2) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + auto const input = fp_wrapper{{114, 115, 116}, scale_type{-2}}; + auto const expected = fp_wrapper{{11, 12, 12}, scale_type{-1}}; + auto const result = cudf::round(input, 1, cudf::rounding_method::HALF_UP); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} - EXPECT_THROW(cudf::round(input, 1, cudf::rounding_method::HALF_UP), cudf::logic_error); +TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfUp3) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + auto const input = fp_wrapper{{1, 2, 3}, scale_type{1}}; + auto const expected = fp_wrapper{{100, 200, 300}, scale_type{-1}}; + auto const result = cudf::round(input, 1, cudf::rounding_method::HALF_UP); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfEven) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + auto const input = fp_wrapper{{1140, 1150, 1160, 1240, 1250, 1260}, scale_type{-3}}; + auto const expected = fp_wrapper{{11, 12, 12, 12, 12, 13}, scale_type{-1}}; + auto const result = cudf::round(input, 1, cudf::rounding_method::HALF_EVEN); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfEven2) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + auto const input = fp_wrapper{{114, 115, 116, 124, 125, 126}, scale_type{-2}}; + auto const expected = fp_wrapper{{11, 12, 12, 12, 12, 13}, scale_type{-1}}; + auto const result = cudf::round(input, 1, cudf::rounding_method::HALF_EVEN); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfEven3) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; - // enable in follow up PR - // auto const result = cudf::round(col, 1, cudf::rounding_method::HALF_UP); - // CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + auto const input = fp_wrapper{{1, 2, 3}, scale_type{1}}; + auto const expected = fp_wrapper{{100, 200, 300}, scale_type{-1}}; + auto const result = cudf::round(input, 1, cudf::rounding_method::HALF_EVEN); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(RoundTestsFixedPointTypes, EmptyFixedPointTypeTest) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + auto const input = fp_wrapper{{}, scale_type{1}}; + auto const expected = fp_wrapper{{}, scale_type{-1}}; + auto const expected_type = cudf::data_type{cudf::type_to_id(), scale_type{-1}}; + auto const result = cudf::round(input, 1, cudf::rounding_method::HALF_UP); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + EXPECT_EQ(result->view().type(), expected_type); +} + +TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestNegHalfUp) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + auto const input = fp_wrapper{{14, 15, 16, 24, 25, 26}, scale_type{2}}; + auto const expected = fp_wrapper{{1, 2, 2, 2, 3, 3}, scale_type{3}}; + auto const result = cudf::round(input, -3, cudf::rounding_method::HALF_UP); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestNegHalfUp2) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + auto const input = fp_wrapper{{14, 15, 16, 24, 25, 26}, scale_type{3}}; + auto const expected = fp_wrapper{{1, 2, 2, 2, 3, 3}, scale_type{4}}; + auto const result = cudf::round(input, -4, cudf::rounding_method::HALF_UP); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfNegUp3) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + auto const input = fp_wrapper{{1, 2, 3}, scale_type{2}}; + auto const expected = fp_wrapper{{10, 20, 30}, scale_type{1}}; + auto const result = cudf::round(input, -1, cudf::rounding_method::HALF_UP); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestNegHalfEven) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + auto const input = fp_wrapper{{14, 15, 16, 24, 25, 26}, scale_type{2}}; + auto const expected = fp_wrapper{{1, 2, 2, 2, 2, 3}, scale_type{3}}; + auto const result = cudf::round(input, -3, cudf::rounding_method::HALF_EVEN); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestNegHalfEven2) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + auto const input = fp_wrapper{{14, 15, 16, 24, 25, 26}, scale_type{3}}; + auto const expected = fp_wrapper{{1, 2, 2, 2, 2, 3}, scale_type{4}}; + auto const result = cudf::round(input, -4, cudf::rounding_method::HALF_EVEN); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfNegEven3) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + auto const input = fp_wrapper{{1, 2, 3}, scale_type{2}}; + auto const expected = fp_wrapper{{10, 20, 30}, scale_type{1}}; + auto const result = cudf::round(input, -1, cudf::rounding_method::HALF_EVEN); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } TYPED_TEST(RoundTestsFloatingPointTypes, SimpleFloatingPointTestHalfUp0)