Skip to content

Commit

Permalink
Avoid overflow for fixed_point cudf::cast and performance optimiz…
Browse files Browse the repository at this point in the history
…ation (#9772)

This resolves #9000.

When using `cudf::cast` for a wider decimal type to a narrower decimal type, you can overflow. This PR modifies the code path for this specific use case so that the "rescale" happens for the type cast. A small perf improvement was added when you have identical scales to avoid rescaling.

CI depends on #9766

Authors:
  - Conor Hoekstra (https://github.com/codereport)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: #9772
  • Loading branch information
codereport authored Dec 1, 2021
1 parent 836f800 commit 677e632
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 19 deletions.
49 changes: 30 additions & 19 deletions cpp/src/unary/cast_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -305,28 +305,39 @@ struct dispatch_unary_cast_to {
rmm::mr::device_memory_resource* mr)
{
using namespace numeric;

auto const size = input.size();
auto temporary =
std::make_unique<column>(cudf::data_type{type.id(), input.type().scale()},
size,
rmm::device_buffer{size * cudf::size_of(type), stream},
copy_bitmask(input, stream),
input.null_count());

using SourceDeviceT = device_storage_type_t<SourceT>;
using TargetDeviceT = device_storage_type_t<TargetT>;

mutable_column_view output_mutable = *temporary;

thrust::transform(rmm::exec_policy(stream),
input.begin<SourceDeviceT>(),
input.end<SourceDeviceT>(),
output_mutable.begin<TargetDeviceT>(),
device_cast<SourceDeviceT, TargetDeviceT>{});

// clearly there is a more efficient way to do this, can optimize in the future
return rescale<TargetT>(*temporary, numeric::scale_type{type.scale()}, stream, mr);
auto casted = [&]() {
auto const size = input.size();
auto output = std::make_unique<column>(cudf::data_type{type.id(), input.type().scale()},
size,
rmm::device_buffer{size * cudf::size_of(type), stream},
copy_bitmask(input, stream),
input.null_count());

mutable_column_view output_mutable = *output;

thrust::transform(rmm::exec_policy(stream),
input.begin<SourceDeviceT>(),
input.end<SourceDeviceT>(),
output_mutable.begin<TargetDeviceT>(),
device_cast<SourceDeviceT, TargetDeviceT>{});

return output;
};

if (input.type().scale() == type.scale()) return casted();

if constexpr (sizeof(SourceDeviceT) < sizeof(TargetDeviceT)) {
// device_cast BEFORE rescale when SourceDeviceT is < TargetDeviceT
auto temporary = casted();
return detail::rescale<TargetT>(*temporary, scale_type{type.scale()}, stream, mr);
} else {
// device_cast AFTER rescale when SourceDeviceT is > TargetDeviceT to avoid overflow
auto temporary = detail::rescale<SourceT>(input, scale_type{type.scale()}, stream, mr);
return detail::cast(*temporary, type, stream, mr);
}
}

template <typename TargetT,
Expand Down
13 changes: 13 additions & 0 deletions cpp/tests/unary/cast_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1003,3 +1003,16 @@ TYPED_TEST(FixedPointTests, Decimal128ToDecimalXXWithLargerScaleAndNullMask)

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
}

TEST_F(FixedPointTestSingleType, Int32ToInt64Convert)
{
using namespace numeric;
using fp_wrapperA = cudf::test::fixed_point_column_wrapper<int32_t>;
using fp_wrapperB = cudf::test::fixed_point_column_wrapper<int64_t>;

auto const input = fp_wrapperB{{141230900000L}, scale_type{-10}};
auto const expected = fp_wrapperA{{14123}, scale_type{-3}};
auto const result = cudf::cast(input, make_fixed_point_data_type<decimal32>(-3));

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
}

0 comments on commit 677e632

Please sign in to comment.