Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Implement cudf::round decimal32 & decimal64 (HALF_UP and HALF_EVEN) #6685

Merged
merged 17 commits into from
Nov 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
- PR #6460 Add is_timestamp format check API
- PR #6647 Implement `cudf::round` floating point and integer types (`HALF_EVEN`)
- PR #6562 Implement `cudf::round` floating point and integer types (`HALF_UP`)
- PR #6685 Implement `cudf::round` `decimal32` & `decimal64` (`HALF_UP` and `HALF_EVEN`)
- PR #6528 Enable `fixed_point` binary operations
- PR #6460 Add is_timestamp format check API
- PR #6568 Add function to create hashed vocabulary file from raw vocabulary
Expand Down
5 changes: 3 additions & 2 deletions cpp/include/cudf/round.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ enum class rounding_method : int32_t { HALF_UP, HALF_EVEN };
/**
* @brief Rounds all the values in a column to the specified number of decimal places.
*
* `cudf::round` currently supports HALF_UP and HALF_EVEN rounding for integer and floating point
* numbers.
* `cudf::round` currently supports HALF_UP and HALF_EVEN rounding for integer, floating point and
* `decimal32` and `decimal64` numbers. For `decimal32` and `decimal64` numbers, negated
* `numeric::scale` is equivalent to `decimal_places`.
*
* Example:
* ```
Expand Down
95 changes: 78 additions & 17 deletions cpp/src/round/round.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,15 @@
* limitations under the License.
*/

#include <cudf/binaryop.hpp>
#include <cudf/column/column_factories.hpp>
#include <cudf/copying.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/round.hpp>
#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/round.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/type_dispatcher.hpp>
Expand Down Expand Up @@ -67,7 +70,7 @@ int16_t __device__ generic_sign(T value)
template <typename T>
constexpr inline auto is_supported_round_type()
{
return cudf::is_numeric<T>() && not std::is_same<T, bool>::value;
return (cudf::is_numeric<T>() && not std::is_same<T, bool>::value) || cudf::is_fixed_point<T>();
}

template <typename T>
Expand Down Expand Up @@ -179,7 +182,22 @@ struct half_even_negative {
}
};

template <typename T, template <typename> typename RoundFunctor>
template <typename T>
struct half_up_fixed_point {
T n;
__device__ T operator()(T e) { return half_up_negative<T>{n}(e) / n; }
};

template <typename T>
struct half_even_fixed_point {
T n;
__device__ T operator()(T e) { return half_even_negative<T>{n}(e) / n; }
};

template <typename T,
template <typename>
typename RoundFunctor,
typename std::enable_if_t<not cudf::is_fixed_point<T>()>* = nullptr>
std::unique_ptr<column> round_with(column_view const& input,
int32_t decimal_places,
cudaStream_t stream,
Expand All @@ -190,12 +208,8 @@ std::unique_ptr<column> round_with(column_view const& input,
if (decimal_places >= 0 && std::is_integral<T>::value)
return std::make_unique<cudf::column>(input, stream, mr);

auto result = cudf::make_fixed_width_column(input.type(), //
input.size(),
copy_bitmask(input, stream, mr),
input.null_count(),
stream,
mr);
auto result = cudf::make_fixed_width_column(
input.type(), input.size(), copy_bitmask(input, stream, mr), input.null_count(), stream, mr);

auto out_view = result->mutable_view();
T const n = std::pow(10, std::abs(decimal_places));
Expand All @@ -209,6 +223,45 @@ std::unique_ptr<column> round_with(column_view const& input,
return result;
}

template <typename T,
template <typename>
typename RoundFunctor,
typename std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
std::unique_ptr<column> round_with(column_view const& input,
int32_t decimal_places,
cudaStream_t stream,
rmm::mr::device_memory_resource* mr)
{
using namespace numeric;
using Type = device_storage_type_t<T>;
using FixedPointRoundFunctor = RoundFunctor<Type>;

// if rounding to more precision than fixed_point is capable of, just need to rescale
// note: decimal_places has the opposite sign of numeric::scale_type (therefore have to negate)
if (input.type().scale() > -decimal_places) {
codereport marked this conversation as resolved.
Show resolved Hide resolved
// TODO replace this cudf::binary_operation with a cudf::cast or cudf::rescale when available
auto const diff = input.type().scale() - (-decimal_places);
auto const scalar = cudf::make_fixed_point_scalar<T>(std::pow(10, diff), scale_type{-diff});
return cudf::binary_operation(input, *scalar, cudf::binary_operator::MUL, {}, mr);
}

auto const result_type = data_type{input.type().id(), scale_type{-decimal_places}};

auto result = cudf::make_fixed_width_column(
result_type, input.size(), copy_bitmask(input, stream, mr), input.null_count(), stream, mr);

auto out_view = result->mutable_view();
Type const n = std::pow(10, std::abs(decimal_places + input.type().scale()));

thrust::transform(rmm::exec_policy(stream)->on(stream),
input.begin<Type>(),
input.end<Type>(),
out_view.begin<Type>(),
FixedPointRoundFunctor{n});

return result;
}

struct round_type_dispatcher {
template <typename T, typename... Args>
std::enable_if_t<not is_supported_round_type<T>(), std::unique_ptr<column>> operator()(
Expand All @@ -228,13 +281,15 @@ struct round_type_dispatcher {
// clang-format off
switch (method) {
case cudf::rounding_method::HALF_UP:
if (decimal_places == 0) return round_with<T, half_up_zero >(input, decimal_places, stream, mr);
else if (decimal_places > 0) return round_with<T, half_up_positive>(input, decimal_places, stream, mr);
else return round_with<T, half_up_negative>(input, decimal_places, stream, mr);
if (is_fixed_point<T>()) return round_with<T, half_up_fixed_point>(input, decimal_places, stream, mr);
else if (decimal_places == 0) return round_with<T, half_up_zero >(input, decimal_places, stream, mr);
else if (decimal_places > 0) return round_with<T, half_up_positive >(input, decimal_places, stream, mr);
else return round_with<T, half_up_negative >(input, decimal_places, stream, mr);
case cudf::rounding_method::HALF_EVEN:
if (decimal_places == 0) return round_with<T, half_even_zero >(input, decimal_places, stream, mr);
else if (decimal_places > 0) return round_with<T, half_even_positive>(input, decimal_places, stream, mr);
else return round_with<T, half_even_negative>(input, decimal_places, stream, mr);
if (is_fixed_point<T>()) return round_with<T, half_even_fixed_point>(input, decimal_places, stream, mr);
else if (decimal_places == 0) return round_with<T, half_even_zero >(input, decimal_places, stream, mr);
else if (decimal_places > 0) return round_with<T, half_even_positive >(input, decimal_places, stream, mr);
else return round_with<T, half_even_negative >(input, decimal_places, stream, mr);
default: CUDF_FAIL("Undefined rounding method");
}
// clang-format on
Expand All @@ -249,10 +304,16 @@ std::unique_ptr<column> round(column_view const& input,
cudaStream_t stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(cudf::is_numeric(input.type()), "Only integral/floating point currently supported.");
CUDF_EXPECTS(cudf::is_numeric(input.type()) || cudf::is_fixed_point(input.type()),
"Only integral/floating point/fixed point currently supported.");

// TODO when fixed_point supported, have to adjust type
if (input.size() == 0) return empty_like(input);
if (input.is_empty()) {
if (is_fixed_point(input.type())) {
auto const type = data_type{input.type().id(), numeric::scale_type{-decimal_places}};
return std::make_unique<cudf::column>(type, 0, rmm::device_buffer{});
}
return empty_like(input);
}

return type_dispatcher(
input.type(), round_type_dispatcher{}, input, decimal_places, method, stream, mr);
Expand Down
203 changes: 199 additions & 4 deletions cpp/tests/round/round_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,34 @@ TYPED_TEST_CASE(RoundTestsIntegerTypes, IntegerTypes);
TYPED_TEST_CASE(RoundTestsFixedPointTypes, cudf::test::FixedPointTypes);
TYPED_TEST_CASE(RoundTestsFloatingPointTypes, cudf::test::FloatingPointTypes);

TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfUpZero)
{
using namespace numeric;
using decimalXX = TypeParam;
using RepType = cudf::device_storage_type_t<decimalXX>;
using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;

auto const input = fp_wrapper{{1140, 1150, 1160, 1240, 1250, 1260}, scale_type{-2}};
auto const expected = fp_wrapper{{11, 12, 12, 12, 13, 13}, scale_type{0}};
auto const result = cudf::round(input);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
}

TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfEvenZero)
{
using namespace numeric;
using decimalXX = TypeParam;
using RepType = cudf::device_storage_type_t<decimalXX>;
using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;

auto const input = fp_wrapper{{1140, 1150, 1160, 1240, 1250, 1260}, scale_type{-2}};
auto const expected = fp_wrapper{{11, 12, 12, 12, 12, 13}, scale_type{0}};
auto const result = cudf::round(input, 0, cudf::rounding_method::HALF_EVEN);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
}

TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfUp)
{
using namespace numeric;
Expand All @@ -51,12 +79,179 @@ TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfUp)

auto const input = fp_wrapper{{1140, 1150, 1160}, scale_type{-3}};
auto const expected = fp_wrapper{{11, 12, 12}, scale_type{-1}};
auto const result = cudf::round(input, 1, cudf::rounding_method::HALF_UP);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
}

TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfUp2)
{
using namespace numeric;
using decimalXX = TypeParam;
using RepType = cudf::device_storage_type_t<decimalXX>;
using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;

auto const input = fp_wrapper{{114, 115, 116}, scale_type{-2}};
auto const expected = fp_wrapper{{11, 12, 12}, scale_type{-1}};
auto const result = cudf::round(input, 1, cudf::rounding_method::HALF_UP);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
}

EXPECT_THROW(cudf::round(input, 1, cudf::rounding_method::HALF_UP), cudf::logic_error);
TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfUp3)
{
using namespace numeric;
using decimalXX = TypeParam;
using RepType = cudf::device_storage_type_t<decimalXX>;
using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;

auto const input = fp_wrapper{{1, 2, 3}, scale_type{1}};
auto const expected = fp_wrapper{{100, 200, 300}, scale_type{-1}};
auto const result = cudf::round(input, 1, cudf::rounding_method::HALF_UP);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
}

TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfEven)
{
using namespace numeric;
using decimalXX = TypeParam;
using RepType = cudf::device_storage_type_t<decimalXX>;
using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;

auto const input = fp_wrapper{{1140, 1150, 1160, 1240, 1250, 1260}, scale_type{-3}};
auto const expected = fp_wrapper{{11, 12, 12, 12, 12, 13}, scale_type{-1}};
auto const result = cudf::round(input, 1, cudf::rounding_method::HALF_EVEN);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
}

TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfEven2)
{
using namespace numeric;
using decimalXX = TypeParam;
using RepType = cudf::device_storage_type_t<decimalXX>;
using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;

auto const input = fp_wrapper{{114, 115, 116, 124, 125, 126}, scale_type{-2}};
auto const expected = fp_wrapper{{11, 12, 12, 12, 12, 13}, scale_type{-1}};
auto const result = cudf::round(input, 1, cudf::rounding_method::HALF_EVEN);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
}

TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfEven3)
{
using namespace numeric;
using decimalXX = TypeParam;
using RepType = cudf::device_storage_type_t<decimalXX>;
using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;

// enable in follow up PR
// auto const result = cudf::round(col, 1, cudf::rounding_method::HALF_UP);
// CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
auto const input = fp_wrapper{{1, 2, 3}, scale_type{1}};
auto const expected = fp_wrapper{{100, 200, 300}, scale_type{-1}};
auto const result = cudf::round(input, 1, cudf::rounding_method::HALF_EVEN);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
}

TYPED_TEST(RoundTestsFixedPointTypes, EmptyFixedPointTypeTest)
{
using namespace numeric;
using decimalXX = TypeParam;
using RepType = cudf::device_storage_type_t<decimalXX>;
using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;

auto const input = fp_wrapper{{}, scale_type{1}};
auto const expected = fp_wrapper{{}, scale_type{-1}};
auto const expected_type = cudf::data_type{cudf::type_to_id<decimalXX>(), scale_type{-1}};
auto const result = cudf::round(input, 1, cudf::rounding_method::HALF_UP);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
EXPECT_EQ(result->view().type(), expected_type);
}

TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestNegHalfUp)
{
using namespace numeric;
using decimalXX = TypeParam;
using RepType = cudf::device_storage_type_t<decimalXX>;
using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;

auto const input = fp_wrapper{{14, 15, 16, 24, 25, 26}, scale_type{2}};
auto const expected = fp_wrapper{{1, 2, 2, 2, 3, 3}, scale_type{3}};
auto const result = cudf::round(input, -3, cudf::rounding_method::HALF_UP);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
}

TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestNegHalfUp2)
{
using namespace numeric;
using decimalXX = TypeParam;
using RepType = cudf::device_storage_type_t<decimalXX>;
using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;

auto const input = fp_wrapper{{14, 15, 16, 24, 25, 26}, scale_type{3}};
auto const expected = fp_wrapper{{1, 2, 2, 2, 3, 3}, scale_type{4}};
auto const result = cudf::round(input, -4, cudf::rounding_method::HALF_UP);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
}

TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfNegUp3)
{
using namespace numeric;
using decimalXX = TypeParam;
using RepType = cudf::device_storage_type_t<decimalXX>;
using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;

auto const input = fp_wrapper{{1, 2, 3}, scale_type{2}};
auto const expected = fp_wrapper{{10, 20, 30}, scale_type{1}};
auto const result = cudf::round(input, -1, cudf::rounding_method::HALF_UP);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
}

TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestNegHalfEven)
{
using namespace numeric;
using decimalXX = TypeParam;
using RepType = cudf::device_storage_type_t<decimalXX>;
using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;

auto const input = fp_wrapper{{14, 15, 16, 24, 25, 26}, scale_type{2}};
auto const expected = fp_wrapper{{1, 2, 2, 2, 2, 3}, scale_type{3}};
auto const result = cudf::round(input, -3, cudf::rounding_method::HALF_EVEN);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
}

TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestNegHalfEven2)
{
using namespace numeric;
using decimalXX = TypeParam;
using RepType = cudf::device_storage_type_t<decimalXX>;
using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;

auto const input = fp_wrapper{{14, 15, 16, 24, 25, 26}, scale_type{3}};
auto const expected = fp_wrapper{{1, 2, 2, 2, 2, 3}, scale_type{4}};
auto const result = cudf::round(input, -4, cudf::rounding_method::HALF_EVEN);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
}

TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfNegEven3)
{
using namespace numeric;
using decimalXX = TypeParam;
using RepType = cudf::device_storage_type_t<decimalXX>;
using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;

auto const input = fp_wrapper{{1, 2, 3}, scale_type{2}};
auto const expected = fp_wrapper{{10, 20, 30}, scale_type{1}};
auto const result = cudf::round(input, -1, cudf::rounding_method::HALF_EVEN);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
}

TYPED_TEST(RoundTestsFloatingPointTypes, SimpleFloatingPointTestHalfUp0)
Expand Down