From 9db65f81fba8d3614580627fe6754d4bd35c4fad Mon Sep 17 00:00:00 2001 From: Conor Hoekstra <36027403+codereport@users.noreply.github.com> Date: Fri, 12 Mar 2021 18:44:50 -0500 Subject: [PATCH] `fixed_point` + `cudf::binary_operation` API Changes (#7435) This resolves https://github.com/rapidsai/cudf/issues/7442 Recently while working with @razajafri on `fixed_point` binary ops, it became clear that the `cudf::binary_operation` is breaking the "easy to use, **hard to misuse**" # 1 design guideline. I knew about this but I slotted it as technical debt to be cleaned up later. Long story short, after discussions with both @razajafri, @jrhemstad and comments on the https://github.com/rapidsai/cudf/issues/7442, we will implement the following: * [x] For `fixed_point` + `cudf::binary_operation` + `DIV` always **use** the `cudf::data_type output_type` parameter * [x] ~~For `fixed_point` + `cudf::binary_operation` + `TRUE_DIV`, require that the columns/scalars provided as arguments (`lhs` and `rhs`) will result in the specified `data_type`/`scale`~~ * [x] Provide a convenience function (something like `binary_operation_fixed_point_scale()`) that will compute the "expected" scale given two input columns/scalars and a `binary_operator` * [x] Remove `TRUE_DIV` * [x] Add unit tests for different output data_types * [x] Update Python/Cython **This will be a breaking change for all `fixed_point` + `cudf::binary_operation`.** Authors: - Conor Hoekstra (@codereport) Approvers: - Keith Kraus (@kkraus14) - Mike Wilson (@hyperbolic2346) URL: https://github.com/rapidsai/cudf/pull/7435 --- cpp/include/cudf/binaryop.hpp | 24 ++ cpp/src/binaryop/binaryop.cpp | 260 +++++---------- cpp/src/unary/cast_ops.cu | 6 +- cpp/tests/binaryop/binop-integration-test.cpp | 312 ++++++++++++++---- cpp/tests/fixed_point/fixed_point_tests.cu | 18 +- python/cudf/cudf/_lib/binaryop.pyx | 11 +- python/cudf/cudf/_lib/column.pyx | 26 +- python/cudf/cudf/_lib/types.pxd | 4 +- python/cudf/cudf/_lib/types.pyx | 21 +- python/cudf/cudf/core/column/decimal.py | 18 +- python/cudf/cudf/core/dtypes.py | 6 +- python/cudf/cudf/utils/dtypes.py | 7 +- 12 files changed, 420 insertions(+), 293 deletions(-) diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp index 72abefef04f..7099c29b9df 100644 --- a/cpp/include/cudf/binaryop.hpp +++ b/cpp/include/cudf/binaryop.hpp @@ -178,5 +178,29 @@ std::unique_ptr binary_operation( data_type output_type, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Computes the `scale` for a `fixed_point` number based on given binary operator `op` + * + * @param op The binary_operator used for two `fixed_point` numbers + * @param left_scale Scale of left `fixed_point` number + * @param right_scale Scale of right `fixed_point` number + * @return The resulting `scale` of the computed `fixed_point` number + */ +int32_t binary_operation_fixed_point_scale(binary_operator op, + int32_t left_scale, + int32_t right_scale); + +/** + * @brief Computes the `data_type` for a `fixed_point` number based on given binary operator `op` + * + * @param op The binary_operator used for two `fixed_point` numbers + * @param lhs `cudf::data_type` of left `fixed_point` number + * @param rhs `cudf::data_type` of right `fixed_point` number + * @return The resulting `cudf::data_type` of the computed `fixed_point` number + */ +cudf::data_type binary_operation_fixed_point_output_type(binary_operator op, + cudf::data_type const& lhs, + cudf::data_type const& rhs); + /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index fc697267ca7..6b5afa69300 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -378,25 +379,7 @@ bool is_comparison_binop(binary_operator op) */ bool is_supported_fixed_point_binop(binary_operator op) { - return is_basic_arithmetic_binop(op) or is_comparison_binop(op) or - op == binary_operator::TRUE_DIV; -} - -/** - * @brief Computes the scale for a `fixed_point` number based on given binary operator `op` - * - * @param op The binary_operator used for two `fixed_point` numbers - * @param left_scale Scale of left `fixed_point` number - * @param right_scale Scale of right `fixed_point` number - * @return int32_t The resulting `scale` of the computed `fixed_point` number - */ -int32_t compute_scale_for_binop(binary_operator op, int32_t left_scale, int32_t right_scale) -{ - CUDF_EXPECTS(is_supported_fixed_point_binop(op), "Unsupported fixed_point binary operation."); - if (op == binary_operator::TRUE_DIV) CUDF_FAIL("TRUE_DIV scale cannot be computed."); - if (op == binary_operator::MUL) return left_scale + right_scale; - if (op == binary_operator::DIV) return left_scale - right_scale; - return std::min(left_scale, right_scale); + return is_basic_arithmetic_binop(op) or is_comparison_binop(op); } /** @@ -411,6 +394,26 @@ bool is_same_scale_necessary(binary_operator op) return op != binary_operator::MUL && op != binary_operator::DIV; } +template +void fixed_point_binary_operation_validation(binary_operator op, + Lhs lhs, + Rhs rhs, + thrust::optional output_type = {}) +{ + CUDF_EXPECTS(is_fixed_point(lhs), "Input must have fixed_point data_type."); + CUDF_EXPECTS(is_fixed_point(rhs), "Input must have fixed_point data_type."); + CUDF_EXPECTS(is_supported_fixed_point_binop(op), "Unsupported fixed_point binary operation"); + CUDF_EXPECTS(lhs.id() == rhs.id(), "Data type mismatch"); + if (output_type.has_value()) { + if (is_comparison_binop(op)) + CUDF_EXPECTS(output_type == cudf::data_type{type_id::BOOL8}, + "Comparison operations require boolean output type."); + else + CUDF_EXPECTS(is_fixed_point(output_type.value()), + "fixed_point binary operations require fixed_point output type."); + } +} + /** * @brief Function to compute binary operation of one `column_view` and one `scalar` * @@ -424,52 +427,24 @@ bool is_same_scale_necessary(binary_operator op) std::unique_ptr fixed_point_binary_operation(scalar const& lhs, column_view const& rhs, binary_operator op, - thrust::optional output_type, + cudf::data_type output_type, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { using namespace numeric; - CUDF_EXPECTS(is_fixed_point(lhs.type()), "Input must have fixed_point data_type."); - CUDF_EXPECTS(is_fixed_point(rhs.type()), "Input must have fixed_point data_type."); - CUDF_EXPECTS(is_supported_fixed_point_binop(op), "Unsupported fixed_point binary operation"); - CUDF_EXPECTS(lhs.type().id() == rhs.type().id(), "Data type mismatch"); - if (op == binary_operator::TRUE_DIV) - CUDF_EXPECTS(output_type.has_value(), "TRUE_DIV requires result_type."); - - auto const scale = op == binary_operator::TRUE_DIV - ? output_type.value().scale() - : compute_scale_for_binop(op, lhs.type().scale(), rhs.type().scale()); - - auto const out_type = output_type.value_or( - is_comparison_binop(op) ? data_type{type_id::BOOL8} : data_type{lhs.type().id(), scale}); + fixed_point_binary_operation_validation(op, lhs.type(), rhs.type(), output_type); - auto out = make_fixed_width_column_for_output(lhs, rhs, op, out_type, stream, mr); - - if (rhs.is_empty()) return out; + if (rhs.is_empty()) + return make_fixed_width_column_for_output(lhs, rhs, op, output_type, stream, mr); + auto const scale = binary_operation_fixed_point_scale(op, lhs.type().scale(), rhs.type().scale()); + auto const type = + is_comparison_binop(op) ? data_type{type_id::BOOL8} : cudf::data_type{rhs.type().id(), scale}; + auto out = make_fixed_width_column_for_output(lhs, rhs, op, type, stream, mr); auto out_view = out->mutable_view(); - if (op == binary_operator::TRUE_DIV) { - // Adjust scalar so lhs has the scale needed to get desired output data_type (scale) - auto const diff = (lhs.type().scale() - rhs.type().scale()) - scale; - auto const unused_scale = scale_type{0}; // scale of out_view already set - if (lhs.type().id() == type_id::DECIMAL32) { - auto const factor = numeric::detail::ipow(std::abs(diff)); - auto const val = static_cast const&>(lhs).value(); - auto const scaled_value = diff < 0 ? val / factor : val * factor; - auto const scalar = make_fixed_point_scalar(scaled_value, unused_scale); - binops::jit::binary_operation(out_view, *scalar, rhs, binary_operator::DIV, stream); - return out; - } else { - auto const factor = numeric::detail::ipow(std::abs(diff)); - auto const val = static_cast const&>(lhs).value(); - auto const scaled_value = diff < 0 ? val / factor : val * factor; - auto const scalar = make_fixed_point_scalar(scaled_value, unused_scale); - binops::jit::binary_operation(out_view, *scalar, rhs, binary_operator::DIV, stream); - return out; - } - } else if (lhs.type().scale() != rhs.type().scale() && is_same_scale_necessary(op)) { + if (lhs.type().scale() != rhs.type().scale() && is_same_scale_necessary(op)) { // Adjust scalar/column so they have they same scale if (rhs.type().scale() < lhs.type().scale()) { auto const diff = lhs.type().scale() - rhs.type().scale(); @@ -479,7 +454,6 @@ std::unique_ptr fixed_point_binary_operation(scalar const& lhs, auto const scale = scale_type{rhs.type().scale()}; auto const scalar = make_fixed_point_scalar(val * factor, scale); binops::jit::binary_operation(out_view, *scalar, rhs, op, stream); - return out; } else { CUDF_EXPECTS(lhs.type().id() == type_id::DECIMAL64, "Unexpected DTYPE"); auto const factor = numeric::detail::ipow(diff); @@ -487,31 +461,27 @@ std::unique_ptr fixed_point_binary_operation(scalar const& lhs, auto const scale = scale_type{rhs.type().scale()}; auto const scalar = make_fixed_point_scalar(val * factor, scale); binops::jit::binary_operation(out_view, *scalar, rhs, op, stream); - return out; } } else { auto const diff = rhs.type().scale() - lhs.type().scale(); auto const result = [&] { if (lhs.type().id() == type_id::DECIMAL32) { auto const factor = numeric::detail::ipow(diff); - auto const scalar = - make_fixed_point_scalar(factor, scale_type{rhs.type().scale()}); + auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); return binary_operation(*scalar, rhs, binary_operator::MUL, lhs.type(), stream, mr); } else { CUDF_EXPECTS(lhs.type().id() == type_id::DECIMAL64, "Unexpected DTYPE"); auto const factor = numeric::detail::ipow(diff); - auto const scalar = - make_fixed_point_scalar(factor, scale_type{rhs.type().scale()}); + auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); return binary_operation(*scalar, rhs, binary_operator::MUL, lhs.type(), stream, mr); } }(); binops::jit::binary_operation(out_view, lhs, result->view(), op, stream); - return out; } } else { binops::jit::binary_operation(out_view, lhs, rhs, op, stream); - return out; } + return output_type.scale() != scale ? cudf::cast(out_view, output_type) : std::move(out); } /** @@ -527,52 +497,24 @@ std::unique_ptr fixed_point_binary_operation(scalar const& lhs, std::unique_ptr fixed_point_binary_operation(column_view const& lhs, scalar const& rhs, binary_operator op, - thrust::optional output_type, + cudf::data_type output_type, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { using namespace numeric; - CUDF_EXPECTS(is_fixed_point(lhs.type()), "Input must have fixed_point data_type."); - CUDF_EXPECTS(is_fixed_point(rhs.type()), "Input must have fixed_point data_type."); - CUDF_EXPECTS(is_supported_fixed_point_binop(op), "Unsupported fixed_point binary operation"); - CUDF_EXPECTS(lhs.type().id() == rhs.type().id(), "Data type mismatch"); - if (op == binary_operator::TRUE_DIV) - CUDF_EXPECTS(output_type.has_value(), "TRUE_DIV requires result_type."); - - auto const scale = op == binary_operator::TRUE_DIV - ? output_type.value().scale() - : compute_scale_for_binop(op, lhs.type().scale(), rhs.type().scale()); + fixed_point_binary_operation_validation(op, lhs.type(), rhs.type(), output_type); - auto const out_type = output_type.value_or( - is_comparison_binop(op) ? data_type{type_id::BOOL8} : data_type{lhs.type().id(), scale}); - - auto out = make_fixed_width_column_for_output(lhs, rhs, op, out_type, stream, mr); - - if (lhs.is_empty()) return out; + if (lhs.is_empty()) + return make_fixed_width_column_for_output(lhs, rhs, op, output_type, stream, mr); + auto const scale = binary_operation_fixed_point_scale(op, lhs.type().scale(), rhs.type().scale()); + auto const type = + is_comparison_binop(op) ? data_type{type_id::BOOL8} : cudf::data_type{lhs.type().id(), scale}; + auto out = make_fixed_width_column_for_output(lhs, rhs, op, type, stream, mr); auto out_view = out->mutable_view(); - if (op == binary_operator::TRUE_DIV) { - // Adjust columns so lhs has the scale needed to get desired output data_type (scale) - auto const diff = (lhs.type().scale() - rhs.type().scale()) - scale; - auto const interim_op = diff < 0 ? binary_operator::DIV : binary_operator::MUL; - auto const scalar_scale = scale_type{rhs.type().scale() + scale}; - auto const result = [&] { - if (lhs.type().id() == type_id::DECIMAL32) { - auto const factor = numeric::detail::ipow(std::abs(diff)); - auto const scalar = make_fixed_point_scalar(factor, scalar_scale); - return binary_operation(lhs, *scalar, interim_op, {}, stream, mr); - } else { - CUDF_EXPECTS(lhs.type().id() == type_id::DECIMAL64, "Unexpected DTYPE"); - auto const factor = numeric::detail::ipow(std::abs(diff)); - auto const scalar = make_fixed_point_scalar(factor, scalar_scale); - return binary_operation(lhs, *scalar, interim_op, {}, stream, mr); - } - }(); - binops::jit::binary_operation(out_view, result->view(), rhs, binary_operator::DIV, stream); - return out; - } else if (lhs.type().scale() != rhs.type().scale() && is_same_scale_necessary(op)) { + if (lhs.type().scale() != rhs.type().scale() && is_same_scale_necessary(op)) { // Adjust scalar/column so they have they same scale if (rhs.type().scale() > lhs.type().scale()) { auto const diff = rhs.type().scale() - lhs.type().scale(); @@ -582,7 +524,6 @@ std::unique_ptr fixed_point_binary_operation(column_view const& lhs, auto const scale = scale_type{lhs.type().scale()}; auto const scalar = make_fixed_point_scalar(val * factor, scale); binops::jit::binary_operation(out_view, lhs, *scalar, op, stream); - return out; } else { CUDF_EXPECTS(rhs.type().id() == type_id::DECIMAL64, "Unexpected DTYPE"); auto const factor = numeric::detail::ipow(diff); @@ -590,31 +531,27 @@ std::unique_ptr fixed_point_binary_operation(column_view const& lhs, auto const scale = scale_type{rhs.type().scale()}; auto const scalar = make_fixed_point_scalar(val * factor, scale); binops::jit::binary_operation(out_view, lhs, *scalar, op, stream); - return out; } } else { auto const diff = lhs.type().scale() - rhs.type().scale(); auto const result = [&] { if (rhs.type().id() == type_id::DECIMAL32) { auto const factor = numeric::detail::ipow(diff); - auto const scalar = - make_fixed_point_scalar(factor, scale_type{lhs.type().scale()}); + auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); return binary_operation(*scalar, lhs, binary_operator::MUL, rhs.type(), stream, mr); } else { CUDF_EXPECTS(rhs.type().id() == type_id::DECIMAL64, "Unexpected DTYPE"); auto const factor = numeric::detail::ipow(diff); - auto const scalar = - make_fixed_point_scalar(factor, scale_type{lhs.type().scale()}); + auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); return binary_operation(*scalar, lhs, binary_operator::MUL, rhs.type(), stream, mr); } }(); binops::jit::binary_operation(out_view, result->view(), rhs, op, stream); - return out; } } else { binops::jit::binary_operation(out_view, lhs, rhs, op, stream); - return out; } + return output_type.scale() != scale ? cudf::cast(out_view, output_type) : std::move(out); } /** @@ -630,94 +567,59 @@ std::unique_ptr fixed_point_binary_operation(column_view const& lhs, std::unique_ptr fixed_point_binary_operation(column_view const& lhs, column_view const& rhs, binary_operator op, - thrust::optional output_type, + cudf::data_type output_type, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { using namespace numeric; - CUDF_EXPECTS(is_fixed_point(lhs.type()), "Input must have fixed_point data_type."); - CUDF_EXPECTS(is_fixed_point(rhs.type()), "Input must have fixed_point data_type."); - CUDF_EXPECTS(is_supported_fixed_point_binop(op), "Unsupported fixed_point binary operation"); - CUDF_EXPECTS(lhs.type().id() == rhs.type().id(), "Data type mismatch"); - if (op == binary_operator::TRUE_DIV) - CUDF_EXPECTS(output_type.has_value(), "TRUE_DIV requires result_type."); - - auto const scale = op == binary_operator::TRUE_DIV - ? output_type.value().scale() - : compute_scale_for_binop(op, lhs.type().scale(), rhs.type().scale()); - - auto const out_type = output_type.value_or( - is_comparison_binop(op) ? data_type{type_id::BOOL8} : data_type{lhs.type().id(), scale}); + fixed_point_binary_operation_validation(op, lhs.type(), rhs.type(), output_type); - auto out = make_fixed_width_column_for_output(lhs, rhs, op, out_type, stream, mr); - - if (lhs.is_empty() or rhs.is_empty()) return out; + if (lhs.is_empty() or rhs.is_empty()) + return make_fixed_width_column_for_output(lhs, rhs, op, output_type, stream, mr); + auto const scale = binary_operation_fixed_point_scale(op, lhs.type().scale(), rhs.type().scale()); + auto const type = + is_comparison_binop(op) ? data_type{type_id::BOOL8} : cudf::data_type{lhs.type().id(), scale}; + auto out = make_fixed_width_column_for_output(lhs, rhs, op, type, stream, mr); auto out_view = out->mutable_view(); - if (op == binary_operator::TRUE_DIV) { - // Adjust columns so lhs has the scale needed to get desired output data_type (scale) - auto const diff = (lhs.type().scale() - rhs.type().scale()) - scale; - auto const interim_op = diff < 0 ? binary_operator::DIV : binary_operator::MUL; - auto const scalar_scale = scale_type{rhs.type().scale() + scale}; - auto const result = [&] { - if (lhs.type().id() == type_id::DECIMAL32) { - auto const factor = numeric::detail::ipow(std::abs(diff)); - auto const scalar = make_fixed_point_scalar(factor, scalar_scale); - return binary_operation(lhs, *scalar, interim_op, {}, stream, mr); - } else { - CUDF_EXPECTS(lhs.type().id() == type_id::DECIMAL64, "Unexpected DTYPE"); - auto const factor = numeric::detail::ipow(std::abs(diff)); - auto const scalar = make_fixed_point_scalar(factor, scalar_scale); - return binary_operation(lhs, *scalar, interim_op, {}, stream, mr); - } - }(); - binops::jit::binary_operation(out_view, result->view(), rhs, binary_operator::DIV, stream); - return out; - } else if (lhs.type().scale() != rhs.type().scale() && is_same_scale_necessary(op)) { - // Adjust columns so they have they same scale TODO modify comment + if (lhs.type().scale() != rhs.type().scale() && is_same_scale_necessary(op)) { if (rhs.type().scale() < lhs.type().scale()) { auto const diff = lhs.type().scale() - rhs.type().scale(); auto const result = [&] { if (lhs.type().id() == type_id::DECIMAL32) { auto const factor = numeric::detail::ipow(diff); - auto const scalar = - make_fixed_point_scalar(factor, scale_type{lhs.type().scale()}); + auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); return binary_operation(*scalar, lhs, binary_operator::MUL, rhs.type(), stream, mr); } else { CUDF_EXPECTS(lhs.type().id() == type_id::DECIMAL64, "Unexpected DTYPE"); auto const factor = numeric::detail::ipow(diff); - auto const scalar = - make_fixed_point_scalar(factor, scale_type{lhs.type().scale()}); + auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); return binary_operation(*scalar, lhs, binary_operator::MUL, rhs.type(), stream, mr); } }(); binops::jit::binary_operation(out_view, result->view(), rhs, op, stream); - return out; } else { auto const diff = rhs.type().scale() - lhs.type().scale(); auto const result = [&] { if (lhs.type().id() == type_id::DECIMAL32) { auto const factor = numeric::detail::ipow(diff); - auto const scalar = - make_fixed_point_scalar(factor, scale_type{rhs.type().scale()}); + auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); return binary_operation(*scalar, rhs, binary_operator::MUL, lhs.type(), stream, mr); } else { CUDF_EXPECTS(lhs.type().id() == type_id::DECIMAL64, "Unexpected DTYPE"); auto const factor = numeric::detail::ipow(diff); - auto const scalar = - make_fixed_point_scalar(factor, scale_type{rhs.type().scale()}); + auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); return binary_operation(*scalar, rhs, binary_operator::MUL, lhs.type(), stream, mr); } }(); binops::jit::binary_operation(out_view, lhs, result->view(), op, stream); - return out; } } else { binops::jit::binary_operation(out_view, lhs, rhs, op, stream); - return out; } + return output_type.scale() != scale ? cudf::cast(out_view, output_type) : std::move(out); } std::unique_ptr binary_operation(scalar const& lhs, @@ -730,11 +632,8 @@ std::unique_ptr binary_operation(scalar const& lhs, if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING) return binops::compiled::binary_operation(lhs, rhs, op, output_type, stream, mr); - if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type())) { - auto const type = - op == binary_operator::TRUE_DIV ? output_type : thrust::optional{thrust::nullopt}; - return fixed_point_binary_operation(lhs, rhs, op, type, stream, mr); - } + if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type())) + return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr); // Check for datatype CUDF_EXPECTS(is_fixed_width(output_type), "Invalid/Unsupported output datatype"); @@ -760,11 +659,8 @@ std::unique_ptr binary_operation(column_view const& lhs, if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING) return binops::compiled::binary_operation(lhs, rhs, op, output_type, stream, mr); - if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type())) { - auto const type = - op == binary_operator::TRUE_DIV ? output_type : thrust::optional{thrust::nullopt}; - return fixed_point_binary_operation(lhs, rhs, op, type, stream, mr); - } + if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type())) + return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr); // Check for datatype CUDF_EXPECTS(is_fixed_width(output_type), "Invalid/Unsupported output datatype"); @@ -792,11 +688,8 @@ std::unique_ptr binary_operation(column_view const& lhs, if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING) return binops::compiled::binary_operation(lhs, rhs, op, output_type, stream, mr); - if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type())) { - auto const type = - op == binary_operator::TRUE_DIV ? output_type : thrust::optional{thrust::nullopt}; - return fixed_point_binary_operation(lhs, rhs, op, type, stream, mr); - } + if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type())) + return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr); // Check for datatype CUDF_EXPECTS(is_fixed_width(output_type), "Invalid/Unsupported output datatype"); @@ -845,6 +738,27 @@ std::unique_ptr binary_operation(column_view const& lhs, } // namespace detail +int32_t binary_operation_fixed_point_scale(binary_operator op, + int32_t left_scale, + int32_t right_scale) +{ + CUDF_EXPECTS(cudf::detail::is_supported_fixed_point_binop(op), + "Unsupported fixed_point binary operation."); + if (op == binary_operator::MUL) return left_scale + right_scale; + if (op == binary_operator::DIV) return left_scale - right_scale; + return std::min(left_scale, right_scale); +} + +cudf::data_type binary_operation_fixed_point_output_type(binary_operator op, + cudf::data_type const& lhs, + cudf::data_type const& rhs) +{ + cudf::detail::fixed_point_binary_operation_validation(op, lhs, rhs); + + auto const scale = binary_operation_fixed_point_scale(op, lhs.scale(), rhs.scale()); + return cudf::data_type{lhs.id(), scale}; +} + std::unique_ptr binary_operation(scalar const& lhs, column_view const& rhs, binary_operator op, diff --git a/cpp/src/unary/cast_ops.cu b/cpp/src/unary/cast_ops.cu index 15db8e6a3dd..7e3a4050b4f 100644 --- a/cpp/src/unary/cast_ops.cu +++ b/cpp/src/unary/cast_ops.cu @@ -178,11 +178,13 @@ std::unique_ptr rescale(column_view input, if (input.type().scale() > scale) { auto const scalar = make_fixed_point_scalar(0, scale_type{scale}); - return detail::binary_operation(input, *scalar, binary_operator::ADD, {}, stream, mr); + auto const type = cudf::data_type{cudf::type_to_id(), scale}; + return detail::binary_operation(input, *scalar, binary_operator::ADD, type, stream, mr); } else { auto const diff = input.type().scale() - scale; auto const scalar = make_fixed_point_scalar(std::pow(10, -diff), scale_type{diff}); - return detail::binary_operation(input, *scalar, binary_operator::DIV, {}, stream, mr); + auto const type = cudf::data_type{cudf::type_to_id(), scale}; + return detail::binary_operation(input, *scalar, binary_operator::DIV, type, stream, mr); } }; diff --git a/cpp/tests/binaryop/binop-integration-test.cpp b/cpp/tests/binaryop/binop-integration-test.cpp index 2d17853a72b..019e72d3d3f 100644 --- a/cpp/tests/binaryop/binop-integration-test.cpp +++ b/cpp/tests/binaryop/binop-integration-test.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -30,6 +31,7 @@ #include #include +#include "cudf/utilities/error.hpp" namespace cudf { namespace test { @@ -2043,7 +2045,11 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpAdd) auto const rhs = wrapper(vec2.begin(), vec2.end()); auto const expected_col = wrapper(expected.begin(), expected.end()); - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, {}); + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, + static_cast(lhs).type(), + static_cast(rhs).type()); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col, result->view()); } @@ -2072,7 +2078,11 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpMultiply) auto const rhs = wrapper(vec2.begin(), vec2.end()); auto const expected_col = wrapper(expected.begin(), expected.end()); - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::MUL, {}); + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::MUL, + static_cast(lhs).type(), + static_cast(rhs).type()); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::MUL, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col, result->view()); } @@ -2090,7 +2100,11 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpMultiply2) auto const rhs = fp_wrapper{{10, 10, 10, 10, 10}, scale_type{0}}; auto const expected = fp_wrapper{{110, 220, 330, 440, 550}, scale_type{-1}}; - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::MUL, {}); + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::MUL, + static_cast(lhs).type(), + static_cast(rhs).type()); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::MUL, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2105,7 +2119,11 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpDiv) auto const rhs = fp_wrapper{{4, 4, 4, 4}, scale_type{0}}; auto const expected = fp_wrapper{{2, 7, 12, 17}, scale_type{-1}}; - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, {}); + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::DIV, + static_cast(lhs).type(), + static_cast(rhs).type()); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2120,7 +2138,11 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpDiv2) auto const rhs = fp_wrapper{{4, 4, 4, 4}, scale_type{-2}}; auto const expected = fp_wrapper{{2, 7, 12, 17}, scale_type{1}}; - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, {}); + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::DIV, + static_cast(lhs).type(), + static_cast(rhs).type()); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2135,7 +2157,9 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpDiv3) auto const rhs = make_fixed_point_scalar(12, scale_type{-1}); auto const expected = fp_wrapper{{0, 2, 4, 5}, scale_type{0}}; - auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, {}); + auto const type = cudf::binary_operation_fixed_point_output_type( + cudf::binary_operator::DIV, static_cast(lhs).type(), rhs->type()); + auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2153,7 +2177,9 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpDiv4) auto const rhs = make_fixed_point_scalar(12, scale_type{-1}); auto const expected = fp_wrapper(result_begin, result_begin + 1000, scale_type{0}); - auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, {}); + auto const type = cudf::binary_operation_fixed_point_output_type( + cudf::binary_operator::DIV, static_cast(lhs).type(), rhs->type()); + auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2168,7 +2194,11 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpAdd2) auto const rhs = fp_wrapper{{100, 200, 300, 400, 500}, scale_type{-2}}; auto const expected = fp_wrapper{{210, 420, 630, 840, 1050}, scale_type{-2}}; - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, {}); + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, + static_cast(lhs).type(), + static_cast(rhs).type()); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2183,7 +2213,78 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpAdd3) auto const rhs = fp_wrapper{{100, 200, 300, 400, 500}, scale_type{-2}}; auto const expected = fp_wrapper{{2100, 4200, 6300, 8400, 10500}, scale_type{-3}}; - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, {}); + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, + static_cast(lhs).type(), + static_cast(rhs).type()); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpAdd4) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = fp_wrapper{{11, 22, 33, 44, 55}, scale_type{-1}}; + auto const rhs = make_fixed_point_scalar(100, scale_type{-2}); + auto const expected = fp_wrapper{{210, 320, 430, 540, 650}, scale_type{-2}}; + + auto const type = cudf::binary_operation_fixed_point_output_type( + cudf::binary_operator::ADD, static_cast(lhs).type(), rhs->type()); + auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::ADD, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpAdd5) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = make_fixed_point_scalar(100, scale_type{-2}); + auto const rhs = fp_wrapper{{11, 22, 33, 44, 55}, scale_type{-1}}; + auto const expected = fp_wrapper{{210, 320, 430, 540, 650}, scale_type{-2}}; + + auto const type = cudf::binary_operation_fixed_point_output_type( + cudf::binary_operator::ADD, lhs->type(), static_cast(rhs).type()); + auto const result = cudf::binary_operation(*lhs, rhs, cudf::binary_operator::ADD, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpAdd6) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const col = fp_wrapper{{3, 4, 5, 6, 7, 8}, scale_type{0}}; + + auto const expected1 = fp_wrapper{{6, 8, 10, 12, 14, 16}, scale_type{0}}; + auto const expected2 = fp_wrapper{{0, 0, 1, 1, 1, 1}, scale_type{1}}; + auto const type1 = cudf::data_type{cudf::type_to_id(), 0}; + auto const type2 = cudf::data_type{cudf::type_to_id(), 1}; + auto const result1 = cudf::binary_operation(col, col, cudf::binary_operator::ADD, type1); + auto const result2 = cudf::binary_operation(col, col, cudf::binary_operator::ADD, type2); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, result1->view()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, result2->view()); +} + +TYPED_TEST(FixedPointTestBothReps, FixedPointCast) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const col = fp_wrapper{{6, 8, 10, 12, 14, 16}, scale_type{0}}; + auto const expected = fp_wrapper{{0, 0, 1, 1, 1, 1}, scale_type{1}}; + auto const type = cudf::data_type{cudf::type_to_id(), 1}; + auto const result = cudf::cast(col, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2198,7 +2299,9 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpMultiplyScalar) auto const rhs = make_fixed_point_scalar(100, scale_type{-1}); auto const expected = fp_wrapper{{1100, 2200, 3300, 4400, 5500}, scale_type{-2}}; - auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::MUL, {}); + auto const type = cudf::binary_operation_fixed_point_output_type( + cudf::binary_operator::MUL, static_cast(lhs).type(), rhs->type()); + auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::MUL, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2213,7 +2316,11 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpSimplePlus) auto const rhs = fp_wrapper{{2250, 1005}, scale_type{-3}}; auto const expected = fp_wrapper{{3750, 3005}, scale_type{-3}}; - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, {}); + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, + static_cast(lhs).type(), + static_cast(rhs).type()); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2229,7 +2336,56 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpEqualSimple) auto const col2 = fp_wrapper{{100, 200, 300, 400}, scale_type{-2}}; auto const expected = wrapper(trues.begin(), trues.end()); - auto const result = cudf::binary_operation(col1, col2, binary_operator::EQUAL, {}); + auto const result = + cudf::binary_operation(col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpEqualSimpleScale0) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const trues = std::vector(4, true); + auto const col = fp_wrapper{{1, 2, 3, 4}, scale_type{0}}; + auto const expected = wrapper(trues.begin(), trues.end()); + + auto const result = + cudf::binary_operation(col, col, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpEqualSimpleScale0Null) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const col1 = fp_wrapper{{1, 2, 3, 4}, {1, 1, 1, 1}, scale_type{0}}; + auto const col2 = fp_wrapper{{1, 2, 3, 4}, {0, 0, 0, 0}, scale_type{0}}; + auto const expected = wrapper{{0, 1, 0, 1}, {0, 0, 0, 0}}; + + auto const result = + cudf::binary_operation(col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpEqualSimpleScale2Null) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const col1 = fp_wrapper{{1, 2, 3, 4}, {1, 1, 1, 1}, scale_type{-2}}; + auto const col2 = fp_wrapper{{1, 2, 3, 4}, {0, 0, 0, 0}, scale_type{0}}; + auto const expected = wrapper{{0, 1, 0, 1}, {0, 0, 0, 0}}; + + auto const result = + cudf::binary_operation(col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2251,7 +2407,11 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpEqualLessGreater) auto const iota_3 = fp_wrapper(vec1.begin(), vec1.end(), scale_type{-3}); auto const zeros_3 = fp_wrapper(vec2.begin(), vec2.end(), scale_type{-1}); - auto const iota_3_after_add = cudf::binary_operation(zeros_3, iota_3, binary_operator::ADD, {}); + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, + static_cast(iota_3).type(), + static_cast(zeros_3).type()); + auto const iota_3_after_add = cudf::binary_operation(zeros_3, iota_3, binary_operator::ADD, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(iota_3, iota_3_after_add->view()); @@ -2260,16 +2420,17 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpEqualLessGreater) auto const trues = std::vector(sz, true); auto const true_col = wrapper(trues.begin(), trues.end()); + auto const btype = cudf::data_type{type_id::BOOL8}; auto const equal_result = - cudf::binary_operation(iota_3, iota_3_after_add->view(), binary_operator::EQUAL, {}); + cudf::binary_operation(iota_3, iota_3_after_add->view(), binary_operator::EQUAL, btype); CUDF_TEST_EXPECT_COLUMNS_EQUAL(true_col, equal_result->view()); auto const less_result = - cudf::binary_operation(zeros_3, iota_3_after_add->view(), binary_operator::LESS, {}); + cudf::binary_operation(zeros_3, iota_3_after_add->view(), binary_operator::LESS, btype); CUDF_TEST_EXPECT_COLUMNS_EQUAL(true_col, less_result->view()); auto const greater_result = - cudf::binary_operation(iota_3_after_add->view(), zeros_3, binary_operator::GREATER, {}); + cudf::binary_operation(iota_3_after_add->view(), zeros_3, binary_operator::GREATER, btype); CUDF_TEST_EXPECT_COLUMNS_EQUAL(true_col, greater_result->view()); } @@ -2284,7 +2445,11 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpNullMaxSimple) auto const col2 = fp_wrapper{{10, 20, 30, 40, 0}, {1, 1, 1, 0, 0}, scale_type{-2}}; auto const expected = fp_wrapper{{40, 20, 30, 10, 0}, {1, 1, 1, 1, 0}, scale_type{-2}}; - auto const result = cudf::binary_operation(col1, col2, binary_operator::NULL_MAX, {}); + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::NULL_MAX, + static_cast(col1).type(), + static_cast(col2).type()); + auto const result = cudf::binary_operation(col1, col2, binary_operator::NULL_MAX, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2300,7 +2465,11 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpNullMinSimple) auto const col2 = fp_wrapper{{10, 20, 30, 40, 0}, {1, 0, 1, 1, 0}, scale_type{-1}}; auto const expected = fp_wrapper{{10, 30, 20, 40, 0}, {1, 1, 1, 1, 0}, scale_type{-1}}; - auto const result = cudf::binary_operation(col1, col2, binary_operator::NULL_MIN, {}); + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::NULL_MIN, + static_cast(col1).type(), + static_cast(col2).type()); + auto const result = cudf::binary_operation(col1, col2, binary_operator::NULL_MIN, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2316,60 +2485,61 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpNullEqualsSimple) auto const col2 = fp_wrapper{{40, 200, 20, 400}, {1, 0, 1, 0}, scale_type{-1}}; auto const expected = wrapper{{1, 0, 0, 1}, {1, 1, 1, 1}}; - auto const result = cudf::binary_operation(col1, col2, binary_operator::NULL_EQUALS, {}); + auto const result = cudf::binary_operation( + col1, col2, binary_operator::NULL_EQUALS, cudf::data_type{type_id::BOOL8}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpTrueDiv) +TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div) { using namespace numeric; using decimalXX = TypeParam; using RepType = device_storage_type_t; - auto const lhs = fp_wrapper{{10, 30, 50, 70}, scale_type{-1}}; + auto const lhs = fp_wrapper{{100, 300, 500, 700}, scale_type{-2}}; auto const rhs = fp_wrapper{{4, 4, 4, 4}, scale_type{0}}; auto const expected = fp_wrapper{{25, 75, 125, 175}, scale_type{-2}}; auto const type = data_type{type_to_id(), -2}; - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::TRUE_DIV, type); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpTrueDiv2) +TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div2) { using namespace numeric; using decimalXX = TypeParam; using RepType = device_storage_type_t; - auto const lhs = fp_wrapper{{10, 30, 50, 70}, scale_type{1}}; + auto const lhs = fp_wrapper{{100000, 300000, 500000, 700000}, scale_type{-3}}; auto const rhs = fp_wrapper{{20, 20, 20, 20}, scale_type{-1}}; auto const expected = fp_wrapper{{5000, 15000, 25000, 35000}, scale_type{-2}}; auto const type = data_type{type_to_id(), -2}; - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::TRUE_DIV, type); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpTrueDiv3) +TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div3) { using namespace numeric; using decimalXX = TypeParam; using RepType = device_storage_type_t; - auto const lhs = fp_wrapper{{10, 30, 50, 70}, scale_type{1}}; - auto const rhs = fp_wrapper{{300, 900, 300, 300}, scale_type{-2}}; + auto const lhs = fp_wrapper{{10000, 30000, 50000, 70000}, scale_type{-2}}; + auto const rhs = fp_wrapper{{3, 9, 3, 3}, scale_type{0}}; auto const expected = fp_wrapper{{3333, 3333, 16666, 23333}, scale_type{-2}}; auto const type = data_type{type_to_id(), -2}; - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::TRUE_DIV, type); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpTrueDiv4) +TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div4) { using namespace numeric; using decimalXX = TypeParam; @@ -2377,131 +2547,127 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpTrueDiv4) auto const lhs = fp_wrapper{{10, 30, 50, 70}, scale_type{1}}; auto const rhs = make_fixed_point_scalar(3, scale_type{0}); - auto const expected = fp_wrapper{{3333, 10000, 16666, 23333}, scale_type{-2}}; + auto const expected = fp_wrapper{{3, 10, 16, 23}, scale_type{1}}; - auto const type = data_type{type_to_id(), -2}; - auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::TRUE_DIV, type); + auto const type = data_type{type_to_id(), 1}; + auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpTrueDiv5) +TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div6) { using namespace numeric; using decimalXX = TypeParam; using RepType = device_storage_type_t; - auto const lhs = fp_wrapper{{10, 30, 50, 70}, scale_type{1}}; - auto const rhs = make_fixed_point_scalar(30000, scale_type{-4}); - auto const expected = fp_wrapper{{3333, 10000, 16666, 23333}, scale_type{-2}}; + auto const lhs = make_fixed_point_scalar(3000, scale_type{-3}); + auto const rhs = fp_wrapper{{10, 30, 50, 70}, scale_type{-1}}; + + auto const expected = fp_wrapper{{300, 100, 60, 42}, scale_type{-2}}; auto const type = data_type{type_to_id(), -2}; - auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::TRUE_DIV, type); + auto const result = cudf::binary_operation(*lhs, rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpTrueDiv6) +TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div7) { using namespace numeric; using decimalXX = TypeParam; using RepType = device_storage_type_t; - for (auto const i : {0, 1, 2, 3, 4, 5, 6, 7}) { - auto const val = 3 * numeric::detail::ipow(i); - auto const lhs = make_fixed_point_scalar(val, scale_type{-i}); - auto const rhs = fp_wrapper{{10, 30, 50, 70}, scale_type{-1}}; - - auto const expected = fp_wrapper{{300, 100, 60, 42}, scale_type{-2}}; - - auto const type = data_type{type_to_id(), -2}; - auto const result = cudf::binary_operation(*lhs, rhs, cudf::binary_operator::TRUE_DIV, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); - } -} - -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpTrueDiv7) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = make_fixed_point_scalar(12000, scale_type{-1}); + auto const lhs = make_fixed_point_scalar(1200, scale_type{0}); auto const rhs = fp_wrapper{{100, 200, 300, 500, 600, 800, 1200, 1300}, scale_type{-2}}; auto const expected = fp_wrapper{{12, 6, 4, 2, 2, 1, 1, 0}, scale_type{2}}; auto const type = data_type{type_to_id(), 2}; - auto const result = cudf::binary_operation(*lhs, rhs, cudf::binary_operator::TRUE_DIV, type); + auto const result = cudf::binary_operation(*lhs, rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpTrueDiv8) +TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div8) { using namespace numeric; using decimalXX = TypeParam; using RepType = device_storage_type_t; auto const lhs = fp_wrapper{{4000, 6000, 80000}, scale_type{-1}}; - auto const rhs = make_fixed_point_scalar(500, scale_type{-2}); + auto const rhs = make_fixed_point_scalar(5000, scale_type{-3}); auto const expected = fp_wrapper{{0, 1, 16}, scale_type{2}}; auto const type = data_type{type_to_id(), 2}; - auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::TRUE_DIV, type); + auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpTrueDiv9) +TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div9) { using namespace numeric; using decimalXX = TypeParam; using RepType = device_storage_type_t; - auto const lhs = fp_wrapper{{100000, 200000, 300000}, scale_type{-2}}; + auto const lhs = fp_wrapper{{10, 20, 30}, scale_type{2}}; auto const rhs = make_fixed_point_scalar(7, scale_type{1}); auto const expected = fp_wrapper{{1, 2, 4}, scale_type{1}}; auto const type = data_type{type_to_id(), 1}; - auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::TRUE_DIV, type); + auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpTrueDiv10) +TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div10) { using namespace numeric; using decimalXX = TypeParam; using RepType = device_storage_type_t; - auto const lhs = fp_wrapper{{100000, 200000, 300000}, scale_type{-2}}; + auto const lhs = fp_wrapper{{100, 200, 300}, scale_type{1}}; auto const rhs = make_fixed_point_scalar(7, scale_type{0}); auto const expected = fp_wrapper{{14, 28, 42}, scale_type{1}}; auto const type = data_type{type_to_id(), 1}; - auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::TRUE_DIV, type); + auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpTrueDiv11) +TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div11) { using namespace numeric; using decimalXX = TypeParam; using RepType = device_storage_type_t; - auto const lhs = fp_wrapper{{1000000, 2000000, 3000000}, scale_type{-2}}; + auto const lhs = fp_wrapper{{1000, 2000, 3000}, scale_type{1}}; auto const rhs = fp_wrapper{{7, 7, 7}, scale_type{0}}; auto const expected = fp_wrapper{{142, 285, 428}, scale_type{1}}; auto const type = data_type{type_to_id(), 1}; - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::TRUE_DIV, type); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } +TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpThrows) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const col = fp_wrapper{{100, 300, 500, 700}, scale_type{-2}}; + auto const non_bool_type = data_type{type_to_id(), -2}; + auto const float_type = data_type{type_id::FLOAT32}; + EXPECT_THROW(cudf::binary_operation(col, col, cudf::binary_operator::LESS, non_bool_type), + cudf::logic_error); + EXPECT_THROW(cudf::binary_operation(col, col, cudf::binary_operator::MUL, float_type), + cudf::logic_error); +} + } // namespace binop } // namespace test } // namespace cudf diff --git a/cpp/tests/fixed_point/fixed_point_tests.cu b/cpp/tests/fixed_point/fixed_point_tests.cu index 5f969098b48..5f74e459bb1 100644 --- a/cpp/tests/fixed_point/fixed_point_tests.cu +++ b/cpp/tests/fixed_point/fixed_point_tests.cu @@ -596,8 +596,9 @@ TEST_F(FixedPointTest, PositiveScaleWithValuesOutsideUnderlyingType32) auto const expected1 = fp_wrapper{{150000000}, scale_type{6}}; auto const expected2 = fp_wrapper{{50000000}, scale_type{6}}; - auto const result1 = cudf::binary_operation(a, b, cudf::binary_operator::ADD, {}); - auto const result2 = cudf::binary_operation(a, c, cudf::binary_operator::DIV, {}); + auto const type = cudf::data_type{cudf::type_id::DECIMAL32, 6}; + auto const result1 = cudf::binary_operation(a, b, cudf::binary_operator::ADD, type); + auto const result2 = cudf::binary_operation(a, c, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, result1->view()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, result2->view()); @@ -618,8 +619,9 @@ TEST_F(FixedPointTest, PositiveScaleWithValuesOutsideUnderlyingType64) auto const expected1 = fp_wrapper{{150000000}, scale_type{100}}; auto const expected2 = fp_wrapper{{50000000}, scale_type{100}}; - auto const result1 = cudf::binary_operation(a, b, cudf::binary_operator::ADD, {}); - auto const result2 = cudf::binary_operation(a, c, cudf::binary_operator::DIV, {}); + auto const type = cudf::data_type{cudf::type_id::DECIMAL64, 100}; + auto const result1 = cudf::binary_operation(a, b, cudf::binary_operator::ADD, type); + auto const result2 = cudf::binary_operation(a, c, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, result1->view()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, result2->view()); @@ -630,6 +632,7 @@ TYPED_TEST(FixedPointTestBothReps, ExtremelyLargeNegativeScale) // This is testing fixed_point values with an extremely large negative scale. The fixed_point // implementation should be able to handle any scale representable by an int32_t + using decimalXX = fixed_point; using fp_wrapper = cudf::test::fixed_point_column_wrapper; auto const a = fp_wrapper{{10}, scale_type{-201}}; @@ -639,8 +642,11 @@ TYPED_TEST(FixedPointTestBothReps, ExtremelyLargeNegativeScale) auto const expected1 = fp_wrapper{{150}, scale_type{-202}}; auto const expected2 = fp_wrapper{{5}, scale_type{-201}}; - auto const result1 = cudf::binary_operation(a, b, cudf::binary_operator::ADD, {}); - auto const result2 = cudf::binary_operation(a, c, cudf::binary_operator::DIV, {}); + auto const type1 = cudf::data_type{cudf::type_to_id(), -202}; + auto const result1 = cudf::binary_operation(a, b, cudf::binary_operator::ADD, type1); + + auto const type2 = cudf::data_type{cudf::type_to_id(), -201}; + auto const result2 = cudf::binary_operation(a, c, cudf::binary_operator::DIV, type2); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, result1->view()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, result2->view()); diff --git a/python/cudf/cudf/_lib/binaryop.pyx b/python/cudf/cudf/_lib/binaryop.pyx index 5228c34af67..59a6b876961 100644 --- a/python/cudf/cudf/_lib/binaryop.pyx +++ b/python/cudf/cudf/_lib/binaryop.pyx @@ -13,7 +13,7 @@ from cudf._lib.replace import replace_nulls from cudf._lib.scalar import as_device_scalar from cudf._lib.scalar cimport DeviceScalar from cudf._lib.types import np_to_cudf_types -from cudf._lib.types cimport underlying_type_t_type_id +from cudf._lib.types cimport underlying_type_t_type_id, dtype_to_data_type from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.scalar.scalar cimport scalar @@ -174,15 +174,8 @@ def binaryop(lhs, rhs, op, dtype): cdef binary_operator c_op = ( op ) - cdef type_id tid = ( - ( - ( - np_to_cudf_types[np.dtype(dtype)] - ) - ) - ) - cdef data_type c_dtype = data_type(tid) + cdef data_type c_dtype = dtype_to_data_type(dtype) if is_scalar(lhs) or lhs is None: is_string_col = is_string_dtype(rhs.dtype) diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index f9b5c859ff2..ffc3fdfd70a 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -32,7 +32,8 @@ from rmm._lib.device_buffer cimport DeviceBuffer from cudf._lib.types import np_to_cudf_types, cudf_to_np_types from cudf._lib.types cimport ( underlying_type_t_type_id, - dtype_from_column_view + dtype_from_column_view, + dtype_to_data_type ) from cudf._lib.null_mask import bitmask_allocation_size_bytes @@ -378,29 +379,12 @@ cdef class Column: cdef column_view _view(self, libcudf_types.size_type null_count) except *: if is_categorical_dtype(self.dtype): col = self.base_children[0] + data_dtype = col.dtype else: col = self + data_dtype = self.dtype - data_dtype = col.dtype - cdef libcudf_types.type_id tid - - if is_list_dtype(self.dtype): - tid = libcudf_types.type_id.LIST - elif is_struct_dtype(self.dtype): - tid = libcudf_types.type_id.STRUCT - elif is_decimal_dtype(self.dtype): - tid = libcudf_types.type_id.DECIMAL64 - else: - tid = ( - ( - np_to_cudf_types[np.dtype(data_dtype)] - ) - ) - cdef libcudf_types.data_type dtype = ( - libcudf_types.data_type(tid, -self.dtype.scale) - if tid == libcudf_types.type_id.DECIMAL64 - else libcudf_types.data_type(tid) - ) + cdef libcudf_types.data_type dtype = dtype_to_data_type(data_dtype) cdef libcudf_types.size_type offset = self.offset cdef vector[column_view] children cdef void* data diff --git a/python/cudf/cudf/_lib/types.pxd b/python/cudf/cudf/_lib/types.pxd index 9b35ca2e80c..383b3665bd9 100644 --- a/python/cudf/cudf/_lib/types.pxd +++ b/python/cudf/cudf/_lib/types.pxd @@ -4,7 +4,7 @@ from libc.stdint cimport int32_t from libcpp cimport bool from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view - +cimport cudf._lib.cpp.types as libcudf_types ctypedef bool underlying_type_t_order ctypedef bool underlying_type_t_null_order @@ -14,3 +14,5 @@ ctypedef int32_t underlying_type_t_type_id ctypedef bool underlying_type_t_null_policy cdef dtype_from_column_view(column_view cv) + +cdef libcudf_types.data_type dtype_to_data_type(dtype) except * diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx index 370d083d7ac..e9ed4f21ddd 100644 --- a/python/cudf/cudf/_lib/types.pyx +++ b/python/cudf/cudf/_lib/types.pyx @@ -15,6 +15,7 @@ from cudf._lib.types cimport ( from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view from cudf.core.dtypes import ListDtype, StructDtype, Decimal64Dtype +from cudf.utils.dtypes import is_decimal_dtype, is_list_dtype, is_struct_dtype cimport cudf._lib.cpp.types as libcudf_types @@ -192,8 +193,7 @@ cdef dtype_from_structs_column_view(column_view cv): cdef dtype_from_decimal_column_view(column_view cv): scale = -cv.type().scale() - precision = 18 # max of 64 bit integer - return Decimal64Dtype(precision=precision, scale=scale) + return Decimal64Dtype(precision=Decimal64Dtype.MAX_PRECISION, scale=scale) cdef dtype_from_column_view(column_view cv): cdef libcudf_types.type_id tid = cv.type().id() @@ -208,3 +208,20 @@ cdef dtype_from_column_view(column_view cv): "Use decimal64 instead") else: return cudf_to_np_types[(tid)] + +cdef libcudf_types.data_type dtype_to_data_type(dtype) except *: + if is_list_dtype(dtype): + tid = libcudf_types.type_id.LIST + elif is_struct_dtype(dtype): + tid = libcudf_types.type_id.STRUCT + elif is_decimal_dtype(dtype): + tid = libcudf_types.type_id.DECIMAL64 + else: + tid = ( + ( + np_to_cudf_types[np.dtype(dtype)])) + + if tid == libcudf_types.type_id.DECIMAL64: + return libcudf_types.data_type(tid, -dtype.scale) + else: + return libcudf_types.data_type(tid) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 4766426892a..7fbe602f07a 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -64,7 +64,11 @@ def to_arrow(self): def binary_operator(self, op, other, reflect=False): if reflect: self, other = other, self - result = libcudf.binaryop.binaryop(self, other, op, "int32") + scale = _binop_scale(self.dtype, other.dtype, op) + output_type = Decimal64Dtype( + scale=scale, precision=Decimal64Dtype.MAX_PRECISION + ) # precision will be ignored, libcudf has no notion of precision + result = libcudf.binaryop.binaryop(self, other, op, output_type) result.dtype.precision = _binop_precision(self.dtype, other.dtype, op) return result @@ -99,6 +103,18 @@ def as_string_column( ) +def _binop_scale(l_dtype, r_dtype, op): + # This should at some point be hooked up to libcudf's + # binary_operation_fixed_point_scale + s1, s2 = l_dtype.scale, r_dtype.scale + if op in ("add", "sub"): + return max(s1, s2) + elif op == "mul": + return s1 + s2 + else: + raise NotImplementedError() + + def _binop_precision(l_dtype, r_dtype, op): """ Returns the result precision when performing the diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py index 8b7d54b6715..a18aad3872b 100644 --- a/python/cudf/cudf/core/dtypes.py +++ b/python/cudf/cudf/core/dtypes.py @@ -235,7 +235,7 @@ class Decimal64Dtype(ExtensionDtype): name = "decimal" _metadata = ("precision", "scale") - _MAX_PRECISION = np.floor(np.log10(np.iinfo("int64").max)) + MAX_PRECISION = np.floor(np.log10(np.iinfo("int64").max)) def __init__(self, precision, scale=0): """ @@ -303,10 +303,10 @@ def __hash__(self): @classmethod def _validate(cls, precision, scale=0): - if precision > Decimal64Dtype._MAX_PRECISION: + if precision > Decimal64Dtype.MAX_PRECISION: raise ValueError( f"Cannot construct a {cls.__name__}" - f" with precision > {cls._MAX_PRECISION}" + f" with precision > {cls.MAX_PRECISION}" ) if abs(scale) > precision: raise ValueError(f"scale={scale} exceeds precision={precision}") diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index 20c86b2a4b7..1438421bb12 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -13,7 +13,7 @@ from pandas.core.dtypes.dtypes import CategoricalDtype, CategoricalDtypeType import cudf -from cudf._lib.scalar import DeviceScalar, _is_null_host_scalar +from cudf._lib.scalar import DeviceScalar from cudf.core._compat import PANDAS_GE_120 _NA_REP = "" @@ -331,7 +331,10 @@ def to_cudf_compatible_scalar(val, dtype=None): If `val` is None, returns None. """ - if _is_null_host_scalar(val) or isinstance(val, cudf.Scalar): + + if cudf._lib.scalar._is_null_host_scalar(val) or isinstance( + val, cudf.Scalar + ): return val if not is_scalar(val):