From c8a171c74ae215dfc12079750eb98f795126c0f3 Mon Sep 17 00:00:00 2001 From: choekstra Date: Tue, 20 Jul 2021 02:07:34 +0000 Subject: [PATCH 001/112] Initial changes --- cpp/include/cudf/fixed_point/fixed_point.hpp | 9 ++++++--- cpp/include/cudf/utilities/traits.hpp | 4 +++- cpp/include/cudf/utilities/type_dispatcher.hpp | 3 ++- cpp/tests/fixed_point/fixed_point_tests.cu | 2 +- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index d7bc9e02eff..9c3e69c3ea8 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -48,7 +48,9 @@ enum class Radix : int32_t { BASE_2 = 2, BASE_10 = 10 }; template constexpr inline auto is_supported_representation_type() { - return cuda::std::is_same::value || cuda::std::is_same::value; + return cuda::std::is_same::value || + cuda::std::is_same::value || + cuda::std::is_same::value; } template @@ -750,8 +752,9 @@ CUDA_HOST_DEVICE_CALLABLE bool operator>(fixed_point const& lhs, return lhs.rescaled(scale)._value > rhs.rescaled(scale)._value; } -using decimal32 = fixed_point; -using decimal64 = fixed_point; +using decimal32 = fixed_point; +using decimal64 = fixed_point; +using decimal128 = fixed_point<__int128_t, Radix::BASE_10>; /** @} */ // end of group } // namespace numeric diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index 2cdc455e05c..808fc45d07f 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -379,7 +379,9 @@ constexpr inline bool is_timestamp(data_type type) template constexpr inline bool is_fixed_point() { - return std::is_same::value || std::is_same::value; + return std::is_same::value || + std::is_same::value || + std::is_same::value; } struct is_fixed_point_impl { diff --git a/cpp/include/cudf/utilities/type_dispatcher.hpp b/cpp/include/cudf/utilities/type_dispatcher.hpp index bd9ea015a32..48259d3ee0d 100644 --- a/cpp/include/cudf/utilities/type_dispatcher.hpp +++ b/cpp/include/cudf/utilities/type_dispatcher.hpp @@ -99,7 +99,8 @@ using id_to_type = typename id_to_type_impl::type; template using device_storage_type_t = std::conditional_t::value, int32_t, - std::conditional_t::value, int64_t, T>>; + std::conditional_t::value, int64_t, + std::conditional_t::value, __int128_t, T>>>; // clang-format on /** diff --git a/cpp/tests/fixed_point/fixed_point_tests.cu b/cpp/tests/fixed_point/fixed_point_tests.cu index 7244b913a6a..2627ab6d48d 100644 --- a/cpp/tests/fixed_point/fixed_point_tests.cu +++ b/cpp/tests/fixed_point/fixed_point_tests.cu @@ -43,7 +43,7 @@ template struct FixedPointTestBothReps : public cudf::test::BaseFixture { }; -using RepresentationTypes = ::testing::Types; +using RepresentationTypes = ::testing::Types; TYPED_TEST_CASE(FixedPointTestBothReps, RepresentationTypes); From afe6ec6a3f3fdaa221a3fbfe902c3977d366a84b Mon Sep 17 00:00:00 2001 From: choekstra Date: Tue, 20 Jul 2021 06:38:45 +0000 Subject: [PATCH 002/112] More changes --- .../cudf/column/column_device_view.cuh | 18 ++++++++ .../cudf/detail/aggregation/aggregation.cuh | 42 ++++++++++++------- cpp/include/cudf/detail/copy_if.cuh | 5 +++ .../cudf/detail/utilities/device_atomics.cuh | 2 +- cpp/include/cudf/fixed_point/fixed_point.hpp | 29 +++++++------ cpp/include/cudf/types.hpp | 1 + cpp/include/cudf/utilities/traits.hpp | 4 +- .../cudf/utilities/type_dispatcher.hpp | 15 ++++++- cpp/src/io/json/json_gpu.cu | 8 ++++ cpp/src/jit/type.cpp | 1 + cpp/src/quantiles/quantile.cu | 4 +- cpp/src/round/round.cu | 3 +- cpp/src/scalar/scalar.cpp | 3 ++ .../strings/convert/convert_fixed_point.cu | 41 ++++++++++-------- cpp/src/unary/math_ops.cu | 5 ++- 15 files changed, 128 insertions(+), 53 deletions(-) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 02e3eee6b43..4639cb4f357 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -455,6 +455,24 @@ class alignas(16) column_device_view : public detail::column_device_view_base { return decimal64{scaled_integer{data()[element_index], scale}}; } + /** + * @brief Returns a `numeric::decimal128` element at the specified index for a `fixed_point` + * column. + * + * If the element at the specified index is NULL, i.e., `is_null(element_index) == true`, + * then any attempt to use the result will lead to undefined behavior. + * + * @param element_index Position of the desired element + * @return numeric::decimal128 representing the element at this index + */ + template ::value)> + __device__ T element(size_type element_index) const noexcept + { + using namespace numeric; + auto const scale = scale_type{_type.scale()}; + return decimal128{scaled_integer<__int128_t>{data<__int128_t>()[element_index], scale}}; + } + /** * @brief For a given `T`, indicates if `column_device_view::element()` has a valid overload. * diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh index 53c1f47c201..c64fba286d4 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.cuh +++ b/cpp/include/cudf/detail/aggregation/aggregation.cuh @@ -139,9 +139,11 @@ struct update_target_element< { if (source_has_nulls and source.is_null(source_index)) { return; } - using Target = target_type_t; - atomicMin(&target.element(target_index), - static_cast(source.element(source_index))); + if constexpr (not std::is_same::value) { + using Target = target_type_t; + atomicMin(&target.element(target_index), + static_cast(source.element(source_index))); + } if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } } @@ -164,8 +166,10 @@ struct update_target_element; using DeviceSource = device_storage_type_t; - atomicMin(&target.element(target_index), - static_cast(source.element(source_index))); + if constexpr (not std::is_same::value) { + atomicMin(&target.element(target_index), + static_cast(source.element(source_index))); + } if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } } @@ -185,9 +189,11 @@ struct update_target_element< { if (source_has_nulls and source.is_null(source_index)) { return; } - using Target = target_type_t; - atomicMax(&target.element(target_index), - static_cast(source.element(source_index))); + if constexpr (not std::is_same::value) { + using Target = target_type_t; + atomicMax(&target.element(target_index), + static_cast(source.element(source_index))); + } if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } } @@ -210,8 +216,10 @@ struct update_target_element; using DeviceSource = device_storage_type_t; - atomicMax(&target.element(target_index), - static_cast(source.element(source_index))); + if constexpr (not std::is_same::value) { + atomicMax(&target.element(target_index), + static_cast(source.element(source_index))); + } if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } } @@ -231,9 +239,11 @@ struct update_target_element< { if (source_has_nulls and source.is_null(source_index)) { return; } - using Target = target_type_t; - atomicAdd(&target.element(target_index), - static_cast(source.element(source_index))); + if constexpr (not std::is_same::value) { + using Target = target_type_t; + atomicAdd(&target.element(target_index), + static_cast(source.element(source_index))); + } if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } } @@ -256,8 +266,10 @@ struct update_target_element; using DeviceSource = device_storage_type_t; - atomicAdd(&target.element(target_index), - static_cast(source.element(source_index))); + if constexpr (not std::is_same::value) { + atomicAdd(&target.element(target_index), + static_cast(source.element(source_index))); + } if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } } diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh index 2051daec00b..372a2ece6de 100644 --- a/cpp/include/cudf/detail/copy_if.cuh +++ b/cpp/include/cudf/detail/copy_if.cuh @@ -223,6 +223,11 @@ struct DeviceType::value using type = typename cudf::device_storage_type_t; }; +template +struct DeviceType::value>> { + using type = typename cudf::device_storage_type_t; +}; + // Dispatch functor which performs the scatter for fixed column types and gather for other template struct scatter_gather_functor { diff --git a/cpp/include/cudf/detail/utilities/device_atomics.cuh b/cpp/include/cudf/detail/utilities/device_atomics.cuh index 6380e76fdfa..8e340408449 100644 --- a/cpp/include/cudf/detail/utilities/device_atomics.cuh +++ b/cpp/include/cudf/detail/utilities/device_atomics.cuh @@ -426,7 +426,7 @@ struct typesAtomicCASImpl { * @returns The old value at `address` */ template -typename std::enable_if_t(), T> __forceinline__ __device__ +typename std::enable_if_t() && not std::is_same::value, T> __forceinline__ __device__ genericAtomicOperation(T* address, T const& update_value, BinaryOp op) { auto fun = cudf::detail::genericAtomicOperationImpl{}; diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index 9c3e69c3ea8..ccfd4a7aab4 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -550,19 +550,24 @@ class fixed_point { */ explicit operator std::string() const { - if (_scale < 0) { - auto const av = std::abs(_value); - int64_t const n = std::pow(10, -_scale); - int64_t const f = av % n; - auto const num_zeros = - std::max(0, (-_scale - static_cast(std::to_string(f).size()))); - auto const zeros = std::string(num_zeros, '0'); - auto const sign = _value < 0 ? std::string("-") : std::string(); - return sign + std::to_string(av / n) + std::string(".") + zeros + std::to_string(av % n); - } else { - auto const zeros = std::string(_scale, '0'); - return std::to_string(_value) + zeros; + if constexpr (not std::is_same::value) { + if (_scale < 0) { + auto const av = std::abs(_value); + int64_t const n = std::pow(10, -_scale); + int64_t const f = av % n; + auto const num_zeros = + std::max(0, (-_scale - static_cast(std::to_string(f).size()))); + auto const zeros = std::string(num_zeros, '0'); + auto const sign = _value < 0 ? std::string("-") : std::string(); + return sign + std::to_string(av / n) + std::string(".") + zeros + std::to_string(av % n); + } else { + auto const zeros = std::string(_scale, '0'); + return std::to_string(_value) + zeros; + } } + + // std::abs and std::to_string don't work on __int128_t + return "TODO"; } }; diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index e1037efb5c8..f639c2dae6b 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -228,6 +228,7 @@ enum class type_id : int32_t { LIST, ///< List elements DECIMAL32, ///< Fixed-point type with int32_t DECIMAL64, ///< Fixed-point type with int64_t + DECIMAL128, ///< Fixed-point type with __int128_t STRUCT, ///< Struct elements // `NUM_TYPE_IDS` must be last! NUM_TYPE_IDS ///< Total number of type ids diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index 808fc45d07f..8bae0d5d150 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -152,7 +152,7 @@ constexpr inline bool is_equality_comparable() template constexpr inline bool is_numeric() { - return std::is_integral::value or std::is_floating_point::value; + return std::is_integral::value or std::is_floating_point::value || std::is_same::value; } struct is_numeric_impl { @@ -489,7 +489,7 @@ constexpr inline bool is_chrono(data_type type) template constexpr bool is_rep_layout_compatible() { - return cudf::is_numeric() or cudf::is_chrono() or cudf::is_boolean(); + return cudf::is_numeric() or cudf::is_chrono() or cudf::is_boolean() || std::is_same::value; } /** diff --git a/cpp/include/cudf/utilities/type_dispatcher.hpp b/cpp/include/cudf/utilities/type_dispatcher.hpp index 48259d3ee0d..1fe2692834a 100644 --- a/cpp/include/cudf/utilities/type_dispatcher.hpp +++ b/cpp/include/cudf/utilities/type_dispatcher.hpp @@ -114,6 +114,7 @@ inline type_id device_storage_type_id(type_id id) switch (id) { case type_id::DECIMAL32: return type_id::INT32; case type_id::DECIMAL64: return type_id::INT64; + // case type_id::DECIMAL128: return type_id::INT128; // TODO: avoid this (need for type_id::INT128) default: return id; } } @@ -131,7 +132,8 @@ template bool type_id_matches_device_storage_type(type_id id) { return (id == type_id::DECIMAL32 && std::is_same::value) || - (id == type_id::DECIMAL64 && std::is_same::value) || id == type_to_id(); + (id == type_id::DECIMAL64 && std::is_same::value) || + (id == type_id::DECIMAL128 && std::is_same::value) || id == type_to_id(); } /** @@ -189,6 +191,7 @@ CUDF_TYPE_MAPPING(dictionary32, type_id::DICTIONARY32); CUDF_TYPE_MAPPING(cudf::list_view, type_id::LIST); CUDF_TYPE_MAPPING(numeric::decimal32, type_id::DECIMAL32); CUDF_TYPE_MAPPING(numeric::decimal64, type_id::DECIMAL64); +CUDF_TYPE_MAPPING(numeric::decimal128, type_id::DECIMAL128); CUDF_TYPE_MAPPING(cudf::struct_view, type_id::STRUCT); /** @@ -222,6 +225,7 @@ MAP_NUMERIC_SCALAR(int8_t) MAP_NUMERIC_SCALAR(int16_t) MAP_NUMERIC_SCALAR(int32_t) MAP_NUMERIC_SCALAR(int64_t) +MAP_NUMERIC_SCALAR(__int128_t) MAP_NUMERIC_SCALAR(uint8_t) MAP_NUMERIC_SCALAR(uint16_t) MAP_NUMERIC_SCALAR(uint32_t) @@ -254,6 +258,12 @@ struct type_to_scalar_type_impl { using ScalarDeviceType = cudf::fixed_point_scalar_device_view; }; +template <> +struct type_to_scalar_type_impl { + using ScalarType = cudf::fixed_point_scalar; + using ScalarDeviceType = cudf::fixed_point_scalar_device_view; +}; + template <> // TODO: this is a temporary solution for make_pair_iterator struct type_to_scalar_type_impl { using ScalarType = cudf::numeric_scalar; @@ -495,6 +505,9 @@ CUDA_HOST_DEVICE_CALLABLE constexpr decltype(auto) type_dispatcher(cudf::data_ty case type_id::DECIMAL64: return f.template operator()::type>( std::forward(args)...); + case type_id::DECIMAL128: + return f.template operator()::type>( + std::forward(args)...); case type_id::STRUCT: return f.template operator()::type>( std::forward(args)...); diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu index ba6bc30e0d4..9e8922211b2 100644 --- a/cpp/src/io/json/json_gpu.cu +++ b/cpp/src/io/json/json_gpu.cu @@ -285,6 +285,14 @@ __inline__ __device__ numeric::decimal64 decode_value(const char*, return numeric::decimal64{}; } +template <> +__inline__ __device__ numeric::decimal128 decode_value(const char*, + const char*, + parse_options_view const&) +{ + return numeric::decimal128{}; +} + /** * @brief Functor for converting plain text data to cuDF data type value. */ diff --git a/cpp/src/jit/type.cpp b/cpp/src/jit/type.cpp index 16894168b31..cf91932ca19 100644 --- a/cpp/src/jit/type.cpp +++ b/cpp/src/jit/type.cpp @@ -76,6 +76,7 @@ std::string get_type_name(data_type type) case type_id::STRUCT: return CUDF_STRINGIFY(Struct); case type_id::DECIMAL32: return CUDF_STRINGIFY(int32_t); case type_id::DECIMAL64: return CUDF_STRINGIFY(int64_t); + case type_id::DECIMAL128: return CUDF_STRINGIFY(__int128_t); default: break; } diff --git a/cpp/src/quantiles/quantile.cu b/cpp/src/quantiles/quantile.cu index 25bf4a436ad..831e309961a 100644 --- a/cpp/src/quantiles/quantile.cu +++ b/cpp/src/quantiles/quantile.cu @@ -47,7 +47,7 @@ struct quantile_functor { rmm::mr::device_memory_resource* mr; template - std::enable_if_t::value and not cudf::is_fixed_point(), + std::enable_if_t::value and not cudf::is_fixed_point() or std::is_same::value, // TODO std::unique_ptr> operator()(column_view const& input) { @@ -55,7 +55,7 @@ struct quantile_functor { } template - std::enable_if_t::value or cudf::is_fixed_point(), + std::enable_if_t::value or cudf::is_fixed_point() and not std::is_same::value, // TODO std::unique_ptr> operator()(column_view const& input) { diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index b8c48434f5c..88a1b93e088 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -74,7 +74,8 @@ int16_t __device__ generic_sign(T) template constexpr inline auto is_supported_round_type() { - return (cudf::is_numeric() && not std::is_same::value) || cudf::is_fixed_point(); + return (cudf::is_numeric() && not std::is_same::value) || cudf::is_fixed_point() + && not std::is_same::value; } template diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp index 045bfbe0327..606cdab8368 100644 --- a/cpp/src/scalar/scalar.cpp +++ b/cpp/src/scalar/scalar.cpp @@ -209,6 +209,7 @@ typename fixed_point_scalar::rep_type const* fixed_point_scalar::data() co */ template class fixed_point_scalar; template class fixed_point_scalar; +template class fixed_point_scalar; namespace detail { @@ -282,6 +283,7 @@ template class fixed_width_scalar; template class fixed_width_scalar; template class fixed_width_scalar; template class fixed_width_scalar; +template class fixed_width_scalar<__int128_t>; template class fixed_width_scalar; template class fixed_width_scalar; template class fixed_width_scalar; @@ -340,6 +342,7 @@ template class numeric_scalar; template class numeric_scalar; template class numeric_scalar; template class numeric_scalar; +template class numeric_scalar<__int128_t>; template class numeric_scalar; template class numeric_scalar; template class numeric_scalar; diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu index 2f57b38249f..aee05419b67 100644 --- a/cpp/src/strings/convert/convert_fixed_point.cu +++ b/cpp/src/strings/convert/convert_fixed_point.cu @@ -206,15 +206,18 @@ struct decimal_to_string_size_fn { if (scale >= 0) return count_digits(value) + scale; - auto const abs_value = std::abs(value); - auto const exp_ten = static_cast(exp10(static_cast(-scale))); - auto const fraction = count_digits(abs_value % exp_ten); - auto const num_zeros = std::max(0, (-scale - fraction)); - return static_cast(value < 0) + // sign if negative - count_digits(abs_value / exp_ten) + // integer - 1 + // decimal point - num_zeros + // zeros padding - fraction; // size of fraction + if constexpr (not std::is_same::value) { + auto const abs_value = std::abs(value); + auto const exp_ten = static_cast(exp10(static_cast(-scale))); + auto const fraction = count_digits(abs_value % exp_ten); + auto const num_zeros = std::max(0, (-scale - fraction)); + return static_cast(value < 0) + // sign if negative + count_digits(abs_value / exp_ten) + // integer + 1 + // decimal point + num_zeros + // zeros padding + fraction; // size of fraction + } + return 0; } }; @@ -247,18 +250,20 @@ struct decimal_to_string_fn { // write format: [-]integer.fraction // where integer = abs(value) / (10^abs(scale)) // fraction = abs(value) % (10^abs(scale)) - auto const abs_value = std::abs(value); - if (value < 0) *d_buffer++ = '-'; // add sign - auto const exp_ten = static_cast(exp10(static_cast(-scale))); - auto const num_zeros = std::max(0, (-scale - count_digits(abs_value % exp_ten))); + if constexpr (not std::is_same::value) { // TODO fix + auto const abs_value = std::abs(value); + if (value < 0) *d_buffer++ = '-'; // add sign + auto const exp_ten = static_cast(exp10(static_cast(-scale))); + auto const num_zeros = std::max(0, (-scale - count_digits(abs_value % exp_ten))); - d_buffer += integer_to_string(abs_value / exp_ten, d_buffer); // add the integer part - *d_buffer++ = '.'; // add decimal point + d_buffer += integer_to_string(abs_value / exp_ten, d_buffer); // add the integer part + *d_buffer++ = '.'; // add decimal point - thrust::generate_n(thrust::seq, d_buffer, num_zeros, []() { return '0'; }); // add zeros - d_buffer += num_zeros; + thrust::generate_n(thrust::seq, d_buffer, num_zeros, []() { return '0'; }); // add zeros + d_buffer += num_zeros; - integer_to_string(abs_value % exp_ten, d_buffer); // add the fraction part + integer_to_string(abs_value % exp_ten, d_buffer); // add the fraction part + } } }; diff --git a/cpp/src/unary/math_ops.cu b/cpp/src/unary/math_ops.cu index 0f8d9228310..3849b56d4eb 100644 --- a/cpp/src/unary/math_ops.cu +++ b/cpp/src/unary/math_ops.cu @@ -271,7 +271,10 @@ struct fixed_point_floor { template struct fixed_point_abs { T n; - __device__ T operator()(T data) { return std::abs(data); } + __device__ T operator()(T data) { + // std::abs does not work for __int128_t + return data > 0 ? data : data * -1; + } }; template typename FixedPointFunctor> From 43b615a6d0b4779343bce185d31203f147f3efe6 Mon Sep 17 00:00:00 2001 From: choekstra Date: Tue, 20 Jul 2021 06:52:58 +0000 Subject: [PATCH 003/112] Small cleanup --- cpp/include/cudf/utilities/traits.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index 8bae0d5d150..d7a297d63e7 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -489,7 +489,7 @@ constexpr inline bool is_chrono(data_type type) template constexpr bool is_rep_layout_compatible() { - return cudf::is_numeric() or cudf::is_chrono() or cudf::is_boolean() || std::is_same::value; + return cudf::is_numeric() or cudf::is_chrono() or cudf::is_boolean(); } /** From ebedcadb97eb8633e615fde9d087797cec2cf3dd Mon Sep 17 00:00:00 2001 From: choekstra Date: Tue, 20 Jul 2021 07:45:48 +0000 Subject: [PATCH 004/112] Small cleanup --- cpp/include/cudf/detail/utilities/device_atomics.cuh | 2 +- cpp/include/cudf/utilities/traits.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/device_atomics.cuh b/cpp/include/cudf/detail/utilities/device_atomics.cuh index 8e340408449..6380e76fdfa 100644 --- a/cpp/include/cudf/detail/utilities/device_atomics.cuh +++ b/cpp/include/cudf/detail/utilities/device_atomics.cuh @@ -426,7 +426,7 @@ struct typesAtomicCASImpl { * @returns The old value at `address` */ template -typename std::enable_if_t() && not std::is_same::value, T> __forceinline__ __device__ +typename std::enable_if_t(), T> __forceinline__ __device__ genericAtomicOperation(T* address, T const& update_value, BinaryOp op) { auto fun = cudf::detail::genericAtomicOperationImpl{}; diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index d7a297d63e7..b8255d4eb45 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -152,7 +152,7 @@ constexpr inline bool is_equality_comparable() template constexpr inline bool is_numeric() { - return std::is_integral::value or std::is_floating_point::value || std::is_same::value; + return std::is_integral::value or std::is_floating_point::value or std::is_same::value; } struct is_numeric_impl { From 1d2e0b4a3de4f02932325fd08fbf37868bce08fd Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 21 Jul 2021 00:02:21 +0000 Subject: [PATCH 005/112] Removal of device_storage_type_id, formatting and more --- cpp/include/cudf/detail/iterator.cuh | 3 +-- cpp/include/cudf/fixed_point/fixed_point.hpp | 5 ++--- cpp/include/cudf/utilities/traits.hpp | 6 +++--- .../cudf/utilities/type_dispatcher.hpp | 20 ++----------------- cpp/src/aggregation/aggregation.cu | 3 +-- 5 files changed, 9 insertions(+), 28 deletions(-) diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index deb161fd9c2..3e789299716 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -102,9 +102,8 @@ struct null_replaced_value_accessor { bool has_nulls = true) : col{col}, null_replacement{null_val}, has_nulls{has_nulls} { - CUDF_EXPECTS(type_to_id() == device_storage_type_id(col.type().id()), + CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), "the data type mismatch"); - // verify validity bitmask is non-null, otherwise, is_null_nocheck() will crash if (has_nulls) CUDF_EXPECTS(col.nullable(), "column with nulls must have a validity bitmask"); } diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index ccfd4a7aab4..d195f976419 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -48,8 +48,7 @@ enum class Radix : int32_t { BASE_2 = 2, BASE_10 = 10 }; template constexpr inline auto is_supported_representation_type() { - return cuda::std::is_same::value || - cuda::std::is_same::value || + return cuda::std::is_same::value || cuda::std::is_same::value || cuda::std::is_same::value; } @@ -552,7 +551,7 @@ class fixed_point { { if constexpr (not std::is_same::value) { if (_scale < 0) { - auto const av = std::abs(_value); + auto const av = std::abs(_value); int64_t const n = std::pow(10, -_scale); int64_t const f = av % n; auto const num_zeros = diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index b8255d4eb45..402e2461da7 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -152,7 +152,8 @@ constexpr inline bool is_equality_comparable() template constexpr inline bool is_numeric() { - return std::is_integral::value or std::is_floating_point::value or std::is_same::value; + return std::is_integral::value or std::is_floating_point::value or + std::is_same::value; } struct is_numeric_impl { @@ -379,8 +380,7 @@ constexpr inline bool is_timestamp(data_type type) template constexpr inline bool is_fixed_point() { - return std::is_same::value || - std::is_same::value || + return std::is_same::value || std::is_same::value || std::is_same::value; } diff --git a/cpp/include/cudf/utilities/type_dispatcher.hpp b/cpp/include/cudf/utilities/type_dispatcher.hpp index 1fe2692834a..f6c07eb25ca 100644 --- a/cpp/include/cudf/utilities/type_dispatcher.hpp +++ b/cpp/include/cudf/utilities/type_dispatcher.hpp @@ -98,27 +98,11 @@ using id_to_type = typename id_to_type_impl::type; // clang-format off template using device_storage_type_t = - std::conditional_t::value, int32_t, - std::conditional_t::value, int64_t, + std::conditional_t::value, int32_t, + std::conditional_t::value, int64_t, std::conditional_t::value, __int128_t, T>>>; // clang-format on -/** - * @brief Returns the corresponding `type_id` of type stored on device for a given `type_id` - * - * @param id The given `type_id` - * @return Corresponding `type_id` of type stored on device - */ -inline type_id device_storage_type_id(type_id id) -{ - switch (id) { - case type_id::DECIMAL32: return type_id::INT32; - case type_id::DECIMAL64: return type_id::INT64; - // case type_id::DECIMAL128: return type_id::INT128; // TODO: avoid this (need for type_id::INT128) - default: return id; - } -} - /** * @brief Checks if `fixed_point`-like types have template type `T` matching the column's * stored type id diff --git a/cpp/src/aggregation/aggregation.cu b/cpp/src/aggregation/aggregation.cu index b9193345c94..5b48282cb5b 100644 --- a/cpp/src/aggregation/aggregation.cu +++ b/cpp/src/aggregation/aggregation.cu @@ -28,8 +28,7 @@ void initialize_with_identity(mutable_table_view& table, // kernel per column for (size_type i = 0; i < table.num_columns(); ++i) { auto col = table.column(i); - auto const type = data_type{device_storage_type_id(col.type().id())}; - dispatch_type_and_aggregation(type, aggs[i], identity_initializer{}, col, stream); + dispatch_type_and_aggregation(col.type(), aggs[i], identity_initializer{}, col, stream); // TODO SFINAE for decimal } } From 2ea39fec28906be12be389b5bae29306efc3efa5 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 21 Jul 2021 01:22:55 +0000 Subject: [PATCH 006/112] Formatting --- .../cudf/column/column_device_view.cuh | 14 ++++---- cpp/src/aggregation/aggregation.cu | 5 +-- cpp/src/io/json/json_gpu.cu | 12 +++---- cpp/src/quantiles/quantile.cu | 7 ++-- cpp/src/round/round.cu | 4 +-- .../strings/convert/convert_fixed_point.cu | 34 +++++++++---------- cpp/src/unary/math_ops.cu | 5 +-- 7 files changed, 43 insertions(+), 38 deletions(-) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 4639cb4f357..c1babff9df3 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -465,13 +465,13 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * @param element_index Position of the desired element * @return numeric::decimal128 representing the element at this index */ - template ::value)> - __device__ T element(size_type element_index) const noexcept - { - using namespace numeric; - auto const scale = scale_type{_type.scale()}; - return decimal128{scaled_integer<__int128_t>{data<__int128_t>()[element_index], scale}}; - } + template ::value)> + __device__ T element(size_type element_index) const noexcept + { + using namespace numeric; + auto const scale = scale_type{_type.scale()}; + return decimal128{scaled_integer<__int128_t>{data<__int128_t>()[element_index], scale}}; + } /** * @brief For a given `T`, indicates if `column_device_view::element()` has a valid overload. diff --git a/cpp/src/aggregation/aggregation.cu b/cpp/src/aggregation/aggregation.cu index 5b48282cb5b..b4d4b99b87a 100644 --- a/cpp/src/aggregation/aggregation.cu +++ b/cpp/src/aggregation/aggregation.cu @@ -27,8 +27,9 @@ void initialize_with_identity(mutable_table_view& table, // TODO: Initialize all the columns in a single kernel instead of invoking one // kernel per column for (size_type i = 0; i < table.num_columns(); ++i) { - auto col = table.column(i); - dispatch_type_and_aggregation(col.type(), aggs[i], identity_initializer{}, col, stream); // TODO SFINAE for decimal + auto col = table.column(i); + dispatch_type_and_aggregation( + col.type(), aggs[i], identity_initializer{}, col, stream); // TODO SFINAE for decimal } } diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu index 9e8922211b2..244081ed286 100644 --- a/cpp/src/io/json/json_gpu.cu +++ b/cpp/src/io/json/json_gpu.cu @@ -63,9 +63,9 @@ __device__ std::pair limit_range_to_brackets(char cons auto const data_begin = thrust::next(thrust::find_if( thrust::seq, begin, end, [] __device__(auto c) { return c == '[' || c == '{'; })); auto const data_end = thrust::next(thrust::find_if(thrust::seq, - thrust::make_reverse_iterator(end), - thrust::make_reverse_iterator(data_begin), - [](auto c) { return c == ']' || c == '}'; })) + thrust::make_reverse_iterator(end), + thrust::make_reverse_iterator(data_begin), + [](auto c) { return c == ']' || c == '}'; })) .base(); return {data_begin, data_end}; } @@ -287,8 +287,8 @@ __inline__ __device__ numeric::decimal64 decode_value(const char*, template <> __inline__ __device__ numeric::decimal128 decode_value(const char*, - const char*, - parse_options_view const&) + const char*, + parse_options_view const&) { return numeric::decimal128{}; } @@ -655,7 +655,7 @@ __global__ void detect_data_types_kernel( bool is_negative = (*desc.value_begin == '-'); char const* data_begin = desc.value_begin + (is_negative || (*desc.value_begin == '+')); cudf::size_type* ptr = cudf::io::gpu::infer_integral_field_counter( - data_begin, data_begin + digit_count, is_negative, column_infos[desc.column]); + data_begin, data_begin + digit_count, is_negative, column_infos[desc.column]); atomicAdd(ptr, 1); } else if (is_like_float( value_len, digit_count, decimal_count, dash_count + plus_count, exponent_count)) { diff --git a/cpp/src/quantiles/quantile.cu b/cpp/src/quantiles/quantile.cu index 831e309961a..89ec7ee7a47 100644 --- a/cpp/src/quantiles/quantile.cu +++ b/cpp/src/quantiles/quantile.cu @@ -47,7 +47,8 @@ struct quantile_functor { rmm::mr::device_memory_resource* mr; template - std::enable_if_t::value and not cudf::is_fixed_point() or std::is_same::value, // TODO + std::enable_if_t::value and not cudf::is_fixed_point() or + std::is_same::value, // TODO std::unique_ptr> operator()(column_view const& input) { @@ -55,7 +56,9 @@ struct quantile_functor { } template - std::enable_if_t::value or cudf::is_fixed_point() and not std::is_same::value, // TODO + std::enable_if_t::value or + cudf::is_fixed_point() and + not std::is_same::value, // TODO std::unique_ptr> operator()(column_view const& input) { diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index 88a1b93e088..16167a82024 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -74,8 +74,8 @@ int16_t __device__ generic_sign(T) template constexpr inline auto is_supported_round_type() { - return (cudf::is_numeric() && not std::is_same::value) || cudf::is_fixed_point() - && not std::is_same::value; + return (cudf::is_numeric() && not std::is_same::value) || + cudf::is_fixed_point() && not std::is_same::value; } template diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu index aee05419b67..be8993cd7ef 100644 --- a/cpp/src/strings/convert/convert_fixed_point.cu +++ b/cpp/src/strings/convert/convert_fixed_point.cu @@ -136,11 +136,11 @@ struct dispatch_to_fixed_point_fn { // create output column auto results = make_fixed_point_column(output_type, - input.size(), - cudf::detail::copy_bitmask(input.parent(), stream, mr), - input.null_count(), - stream, - mr); + input.size(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + input.null_count(), + stream, + mr); auto d_results = results->mutable_view().data(); // convert strings into decimal values @@ -206,17 +206,17 @@ struct decimal_to_string_size_fn { if (scale >= 0) return count_digits(value) + scale; - if constexpr (not std::is_same::value) { + if constexpr (not std::is_same::value) { // TODO auto const abs_value = std::abs(value); auto const exp_ten = static_cast(exp10(static_cast(-scale))); auto const fraction = count_digits(abs_value % exp_ten); auto const num_zeros = std::max(0, (-scale - fraction)); return static_cast(value < 0) + // sign if negative - count_digits(abs_value / exp_ten) + // integer - 1 + // decimal point - num_zeros + // zeros padding - fraction; // size of fraction - } + count_digits(abs_value / exp_ten) + // integer + 1 + // decimal point + num_zeros + // zeros padding + fraction; // size of fraction + } return 0; } }; @@ -250,7 +250,7 @@ struct decimal_to_string_fn { // write format: [-]integer.fraction // where integer = abs(value) / (10^abs(scale)) // fraction = abs(value) % (10^abs(scale)) - if constexpr (not std::is_same::value) { // TODO fix + if constexpr (not std::is_same::value) { // TODO fix auto const abs_value = std::abs(value); if (value < 0) *d_buffer++ = '-'; // add sign auto const exp_ten = static_cast(exp10(static_cast(-scale))); @@ -352,11 +352,11 @@ struct dispatch_is_fixed_point_fn { // create output column auto results = make_numeric_column(data_type{type_id::BOOL8}, - input.size(), - cudf::detail::copy_bitmask(input.parent(), stream, mr), - input.null_count(), - stream, - mr); + input.size(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + input.null_count(), + stream, + mr); auto d_results = results->mutable_view().data(); // check strings for valid fixed-point chars diff --git a/cpp/src/unary/math_ops.cu b/cpp/src/unary/math_ops.cu index 3849b56d4eb..5c44ca3e445 100644 --- a/cpp/src/unary/math_ops.cu +++ b/cpp/src/unary/math_ops.cu @@ -271,9 +271,10 @@ struct fixed_point_floor { template struct fixed_point_abs { T n; - __device__ T operator()(T data) { + __device__ T operator()(T data) + { // std::abs does not work for __int128_t - return data > 0 ? data : data * -1; + return data > 0 ? data : data * -1; } }; From 606d6e3ec6b7091bf726ea9a45b81cbdbf774b1a Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 21 Jul 2021 02:21:36 +0000 Subject: [PATCH 007/112] `cudf::round` support for `__int128_t` --- cpp/src/round/round.cu | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index 16167a82024..715ee64b103 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -74,8 +74,7 @@ int16_t __device__ generic_sign(T) template constexpr inline auto is_supported_round_type() { - return (cudf::is_numeric() && not std::is_same::value) || - cudf::is_fixed_point() && not std::is_same::value; + return (cudf::is_numeric() && not std::is_same::value) || cudf::is_fixed_point(); } template @@ -87,7 +86,9 @@ struct half_up_zero { return generic_round(e); } - template ::value>* = nullptr> + template ::value or + std::is_same::value>* = nullptr> __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation @@ -106,7 +107,9 @@ struct half_up_positive { return integer_part + generic_round(fractional_part * n) / n; } - template ::value>* = nullptr> + template ::value or + std::is_same::value>* = nullptr> __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation @@ -123,7 +126,9 @@ struct half_up_negative { return generic_round(e / n) * n; } - template ::value>* = nullptr> + template ::value or + std::is_same::value>* = nullptr> __device__ U operator()(U e) { auto const down = (e / n) * n; // result from rounding down @@ -140,7 +145,9 @@ struct half_even_zero { return generic_round_half_even(e); } - template ::value>* = nullptr> + template ::value or + std::is_same::value>* = nullptr> __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation @@ -159,7 +166,9 @@ struct half_even_positive { return integer_part + generic_round_half_even(fractional_part * n) / n; } - template ::value>* = nullptr> + template ::value or + std::is_same::value>* = nullptr> __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation @@ -176,7 +185,9 @@ struct half_even_negative { return generic_round_half_even(e / n) * n; } - template ::value>* = nullptr> + template ::value or + std::is_same::value>* = nullptr> __device__ U operator()(U e) { auto const down_over_n = e / n; // use this to determine HALF_EVEN case From ee70203bc37f9308a9dff45627393124ad98b4a7 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 21 Jul 2021 07:13:04 +0000 Subject: [PATCH 008/112] Enable tests & fixes --- cpp/include/cudf/fixed_point/fixed_point.hpp | 7 +++++-- cpp/include/cudf_test/type_lists.hpp | 3 ++- cpp/tests/reductions/scan_tests.cpp | 7 ++++++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index d195f976419..abaecf00925 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -52,10 +52,12 @@ constexpr inline auto is_supported_representation_type() cuda::std::is_same::value; } +// TODO make a temporary::is_integral function template constexpr inline auto is_supported_construction_value_type() { - return cuda::std::is_integral::value || cuda::std::is_floating_point::value; + return cuda::std::is_integral::value || cuda::std::is_floating_point::value || + cuda::std::is_same::value; } // Helper functions for `fixed_point` type @@ -277,7 +279,8 @@ class fixed_point { * @return The `fixed_point` number in base 10 (aka human readable format) */ template ::value>* = nullptr> + typename cuda::std::enable_if_t::value or + std::is_same::value>* = nullptr> explicit constexpr operator U() const { // Don't cast to U until converting to Rep because in certain cases casting to U before shifting diff --git a/cpp/include/cudf_test/type_lists.hpp b/cpp/include/cudf_test/type_lists.hpp index 5c1b0c6c458..e3158d1937b 100644 --- a/cpp/include/cudf_test/type_lists.hpp +++ b/cpp/include/cudf_test/type_lists.hpp @@ -264,7 +264,8 @@ using ListTypes = cudf::test::Types; * TYPED_TEST_CASE(MyTypedFixture, cudf::test::FixedPointTypes); * ``` */ -using FixedPointTypes = cudf::test::Types; +using FixedPointTypes = + cudf::test::Types; /** * @brief Provides a list of all fixed-width element types for use in GTest diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp index ef5a66a2019..23931a8d5af 100644 --- a/cpp/tests/reductions/scan_tests.cpp +++ b/cpp/tests/reductions/scan_tests.cpp @@ -63,6 +63,11 @@ struct TypeParam_to_host_type { using type = numeric::decimal64::rep; }; +template <> +struct TypeParam_to_host_type { + using type = numeric::decimal128::rep; +}; + template typename std::enable_if, thrust::host_vector>::type @@ -408,7 +413,7 @@ TEST_F(ScanStringsTest, MoreStringsMinMax) return std::string(s); }); auto validity = cudf::detail::make_counting_transform_iterator( - 0, [](auto idx) -> bool { return (idx % 23) != 22; }); + 0, [](auto idx) -> bool { return (idx % 23) != 22; }); cudf::test::strings_column_wrapper col(data_begin, data_begin + row_count, validity); thrust::host_vector v(data_begin, data_begin + row_count); From fd6157b1abf4ec02ab3bf98b7f95ab521bd34a2f Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Fri, 23 Jul 2021 17:35:35 +0000 Subject: [PATCH 009/112] Missing changes --- cpp/include/cudf/types.hpp | 2 +- cpp/include/cudf_test/column_wrapper.hpp | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index f639c2dae6b..4cba88e08bb 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -264,7 +264,7 @@ class data_type { */ explicit data_type(type_id id, int32_t scale) : _id{id}, _fixed_point_scale{scale} { - assert(id == type_id::DECIMAL32 || id == type_id::DECIMAL64); + assert(id == type_id::DECIMAL32 || id == type_id::DECIMAL64 || id == type_id::DECIMAL128); } /** diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index a4857552831..341500d3c92 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -509,11 +509,12 @@ class fixed_point_column_wrapper : public detail::column_wrapper { { CUDF_EXPECTS(numeric::is_supported_representation_type(), "not valid representation type"); - auto const size = cudf::distance(begin, end); - auto const elements = thrust::host_vector(begin, end); - auto const is_decimal32 = std::is_same::value; - auto const id = is_decimal32 ? type_id::DECIMAL32 : type_id::DECIMAL64; - auto const data_type = cudf::data_type{id, static_cast(scale)}; + auto const size = cudf::distance(begin, end); + auto const elements = thrust::host_vector(begin, end); + auto const id = std::is_same::value ? type_id::DECIMAL32 + : std::is_same::value ? type_id::DECIMAL64 + : type_id::DECIMAL128; + auto const data_type = cudf::data_type{id, static_cast(scale)}; wrapped.reset(new cudf::column{ data_type, From d4506af5efbc23a0e60f92bc28359ee0779c76c5 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Fri, 23 Jul 2021 18:27:51 +0000 Subject: [PATCH 010/112] Scan, column_wrapper, orc, etc --- cpp/include/cudf_test/column_wrapper.hpp | 11 ++++++----- cpp/src/io/orc/stripe_enc.cu | 17 +++++++++++++---- cpp/src/io/orc/writer_impl.cu | 4 +++- cpp/src/io/parquet/reader_impl.cu | 6 ++++-- cpp/src/reductions/scan/scan_exclusive.cu | 8 ++++++-- cpp/src/reductions/scan/scan_inclusive.cu | 7 +++++-- 6 files changed, 37 insertions(+), 16 deletions(-) diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index 341500d3c92..2de72321e0d 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -575,11 +575,12 @@ class fixed_point_column_wrapper : public detail::column_wrapper { { CUDF_EXPECTS(numeric::is_supported_representation_type(), "not valid representation type"); - auto const size = cudf::distance(begin, end); - auto const elements = thrust::host_vector(begin, end); - auto const is_decimal32 = std::is_same::value; - auto const id = is_decimal32 ? type_id::DECIMAL32 : type_id::DECIMAL64; - auto const data_type = cudf::data_type{id, static_cast(scale)}; + auto const size = cudf::distance(begin, end); + auto const elements = thrust::host_vector(begin, end); + auto const id = std::is_same::value ? type_id::DECIMAL32 + : std::is_same::value ? type_id::DECIMAL64 + : type_id::DECIMAL128; + auto const data_type = cudf::data_type{id, static_cast(scale)}; wrapped.reset(new cudf::column{ data_type, diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index e007c49e61c..e351416314b 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -109,6 +109,13 @@ static inline __device__ uint64_t zigzag(int64_t v) return ((v ^ -s) * 2) + s; } +static inline __device__ uint64_t zigzag(__int128_t v) +{ + // TODO + int64_t s = (v < 0) ? 1 : 0; + return ((v ^ -s) * 2) + s; +} + static inline __device__ uint32_t CountLeadingBytes32(uint32_t v) { return __clz(v) >> 3; } static inline __device__ uint32_t CountLeadingBytes64(uint64_t v) { return __clzll(v) >> 3; } @@ -664,7 +671,7 @@ __global__ void __launch_bounds__(block_size) if (s->present_rows < s->chunk.num_rows) { uint32_t present_rows = s->present_rows; uint32_t nrows = min(s->chunk.num_rows - present_rows, - 512 * 8 - (present_rows - (min(s->cur_row, s->present_out) & ~7))); + 512 * 8 - (present_rows - (min(s->cur_row, s->present_out) & ~7))); uint32_t nrows_out; if (t * 8 < nrows) { uint32_t row = s->chunk.start_row + present_rows + t * 8; @@ -870,9 +877,11 @@ __global__ void __launch_bounds__(block_size) break; case DECIMAL: { if (valid) { - uint64_t const zz_val = (s->chunk.leaf_column->type().id() == type_id::DECIMAL32) - ? zigzag(s->chunk.leaf_column->element(row)) - : zigzag(s->chunk.leaf_column->element(row)); + auto const id = s->chunk.leaf_column->type().id(); + uint64_t const zz_val = + id == type_id::DECIMAL32 ? zigzag(s->chunk.leaf_column->element(row)) + : id == type_id::DECIMAL64 ? zigzag(s->chunk.leaf_column->element(row)) + : zigzag(s->chunk.leaf_column->element<__int128_t>(row)); auto const offset = (row == s->chunk.start_row) ? 0 : s->chunk.decimal_offsets[row - 1]; StoreVarint(s->stream.data_ptrs[CI_DATA] + offset, zz_val); diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 0cd3f333ba3..3875f69dbef 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -94,7 +94,8 @@ constexpr orc::TypeKind to_orc_type(cudf::type_id id) case cudf::type_id::TIMESTAMP_NANOSECONDS: return TypeKind::TIMESTAMP; case cudf::type_id::STRING: return TypeKind::STRING; case cudf::type_id::DECIMAL32: - case cudf::type_id::DECIMAL64: return TypeKind::DECIMAL; + case cudf::type_id::DECIMAL64: + case cudf::type_id::DECIMAL128: return TypeKind::DECIMAL; default: return TypeKind::INVALID_TYPE_KIND; } } @@ -121,6 +122,7 @@ constexpr auto orc_precision(cudf::type_id decimal_id) switch (decimal_id) { case cudf::type_id::DECIMAL32: return 9; case cudf::type_id::DECIMAL64: return 18; + case cudf::type_id::DECIMAL128: return 38; default: return 0; } } diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu index 3bf11063035..92f1a19fe2d 100644 --- a/cpp/src/io/parquet/reader_impl.cu +++ b/cpp/src/io/parquet/reader_impl.cu @@ -224,7 +224,8 @@ std::tuple conversion_info(type_id column_type_id, int8_t converted_type = converted; if (converted_type == parquet::DECIMAL && column_type_id != type_id::FLOAT64 && - column_type_id != type_id::DECIMAL32 && column_type_id != type_id::DECIMAL64) { + column_type_id != type_id::DECIMAL32 && column_type_id != type_id::DECIMAL64 && + column_type_id != type_id::DECIMAL128) { converted_type = parquet::UNKNOWN; // Not converting to float64 or decimal } return std::make_tuple(type_width, clock_rate, converted_type); @@ -593,7 +594,8 @@ class aggregate_metadata { nesting.push_back(static_cast(output_columns.size())); auto const col_type = to_type_id(schema, strings_to_categorical, timestamp_type_id, strict_decimal_types); - auto const dtype = col_type == type_id::DECIMAL32 || col_type == type_id::DECIMAL64 + auto const dtype = col_type == type_id::DECIMAL32 || col_type == type_id::DECIMAL64 || + col_type == type_id::DECIMAL128 ? data_type{col_type, numeric::scale_type{-schema.decimal_scale}} : data_type{col_type}; output_columns.emplace_back(dtype, schema.repetition_type == OPTIONAL ? true : false); diff --git a/cpp/src/reductions/scan/scan_exclusive.cu b/cpp/src/reductions/scan/scan_exclusive.cu index 383b64d45a1..c55cb789f7f 100644 --- a/cpp/src/reductions/scan/scan_exclusive.cu +++ b/cpp/src/reductions/scan/scan_exclusive.cu @@ -50,7 +50,9 @@ struct scan_dispatcher { * @param mr Device memory resource used to allocate the returned column's device memory * @return Output column with scan results */ - template ::value>* = nullptr> + template ::value || + std::is_same::value>* = nullptr> std::unique_ptr operator()(column_view const& input, null_policy, rmm::cuda_stream_view stream, @@ -72,7 +74,9 @@ struct scan_dispatcher { } template - std::enable_if_t::value, std::unique_ptr> operator()(Args&&...) + std::enable_if_t::value and not std::is_same::value, + std::unique_ptr> + operator()(Args&&...) { CUDF_FAIL("Non-arithmetic types not supported for exclusive scan"); } diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu index 1beb9ecb282..5cd935759f4 100644 --- a/cpp/src/reductions/scan/scan_inclusive.cu +++ b/cpp/src/reductions/scan/scan_inclusive.cu @@ -124,11 +124,14 @@ struct scan_dispatcher { template static constexpr bool is_supported() { - return std::is_arithmetic::value || is_string_supported(); + return std::is_arithmetic::value || is_string_supported() || + std::is_same::value; } // for arithmetic types - template ::value>* = nullptr> + template < + typename T, + std::enable_if_t::value || std::is_same::value>* = nullptr> auto inclusive_scan(const column_view& input_view, null_policy, rmm::cuda_stream_view stream, From 791e91cc5d327b98f04b88b3c20f143fcbae817e Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Fri, 23 Jul 2021 22:36:10 +0000 Subject: [PATCH 011/112] Binop changes --- cpp/src/binaryop/binaryop.cpp | 54 ++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index aaf193ff5cf..d6a605307a0 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -412,13 +412,19 @@ std::unique_ptr fixed_point_binary_operation(scalar const& lhs, auto const scale = scale_type{rhs.type().scale()}; auto const scalar = make_fixed_point_scalar(val * factor, scale); binops::jit::binary_operation(out_view, *scalar, rhs, op, stream); - } else { - CUDF_EXPECTS(lhs.type().id() == type_id::DECIMAL64, "Unexpected DTYPE"); + } else if (lhs.type().id() == type_id::DECIMAL64) { auto const factor = numeric::detail::ipow(diff); auto const val = static_cast const&>(lhs).value(); auto const scale = scale_type{rhs.type().scale()}; auto const scalar = make_fixed_point_scalar(val * factor, scale); binops::jit::binary_operation(out_view, *scalar, rhs, op, stream); + } else { + CUDF_EXPECTS(lhs.type().id() == type_id::DECIMAL128, "Unexpected DTYPE"); + auto const factor = numeric::detail::ipow<__int128_t, Radix::BASE_10>(diff); + auto const val = static_cast const&>(lhs).value(); + auto const scale = scale_type{rhs.type().scale()}; + auto const scalar = make_fixed_point_scalar(val * factor, scale); + binops::jit::binary_operation(out_view, *scalar, rhs, op, stream); } } else { auto const diff = rhs.type().scale() - lhs.type().scale(); @@ -427,11 +433,15 @@ std::unique_ptr fixed_point_binary_operation(scalar const& lhs, auto const factor = numeric::detail::ipow(diff); auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); return binary_operation(*scalar, rhs, binary_operator::MUL, lhs.type(), stream, mr); - } else { - CUDF_EXPECTS(lhs.type().id() == type_id::DECIMAL64, "Unexpected DTYPE"); + } else if (lhs.type().id() == type_id::DECIMAL64) { auto const factor = numeric::detail::ipow(diff); auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); return binary_operation(*scalar, rhs, binary_operator::MUL, lhs.type(), stream, mr); + } else { + CUDF_EXPECTS(lhs.type().id() == type_id::DECIMAL128, "Unexpected DTYPE"); + auto const factor = numeric::detail::ipow<__int128_t, Radix::BASE_10>(diff); + auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); + return binary_operation(*scalar, rhs, binary_operator::MUL, lhs.type(), stream, mr); } }(); binops::jit::binary_operation(out_view, lhs, result->view(), op, stream); @@ -482,13 +492,19 @@ std::unique_ptr fixed_point_binary_operation(column_view const& lhs, auto const scale = scale_type{lhs.type().scale()}; auto const scalar = make_fixed_point_scalar(val * factor, scale); binops::jit::binary_operation(out_view, lhs, *scalar, op, stream); - } else { - CUDF_EXPECTS(rhs.type().id() == type_id::DECIMAL64, "Unexpected DTYPE"); + } else if (rhs.type().id() == type_id::DECIMAL64) { auto const factor = numeric::detail::ipow(diff); auto const val = static_cast const&>(rhs).value(); auto const scale = scale_type{rhs.type().scale()}; auto const scalar = make_fixed_point_scalar(val * factor, scale); binops::jit::binary_operation(out_view, lhs, *scalar, op, stream); + } else { + CUDF_EXPECTS(rhs.type().id() == type_id::DECIMAL128, "Unexpected DTYPE"); + auto const factor = numeric::detail::ipow<__int128_t, Radix::BASE_10>(diff); + auto const val = static_cast const&>(rhs).value(); + auto const scale = scale_type{rhs.type().scale()}; + auto const scalar = make_fixed_point_scalar(val * factor, scale); + binops::jit::binary_operation(out_view, lhs, *scalar, op, stream); } } else { auto const diff = lhs.type().scale() - rhs.type().scale(); @@ -497,11 +513,15 @@ std::unique_ptr fixed_point_binary_operation(column_view const& lhs, auto const factor = numeric::detail::ipow(diff); auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); return binary_operation(*scalar, lhs, binary_operator::MUL, rhs.type(), stream, mr); - } else { - CUDF_EXPECTS(rhs.type().id() == type_id::DECIMAL64, "Unexpected DTYPE"); + } else if (rhs.type().id() == type_id::DECIMAL64) { auto const factor = numeric::detail::ipow(diff); auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); return binary_operation(*scalar, lhs, binary_operator::MUL, rhs.type(), stream, mr); + } else { + CUDF_EXPECTS(rhs.type().id() == type_id::DECIMAL128, "Unexpected DTYPE"); + auto const factor = numeric::detail::ipow<__int128_t, Radix::BASE_10>(diff); + auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); + return binary_operation(*scalar, lhs, binary_operator::MUL, rhs.type(), stream, mr); } }(); binops::jit::binary_operation(out_view, result->view(), rhs, op, stream); @@ -550,11 +570,15 @@ std::unique_ptr fixed_point_binary_operation(column_view const& lhs, auto const factor = numeric::detail::ipow(diff); auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); return binary_operation(*scalar, lhs, binary_operator::MUL, rhs.type(), stream, mr); - } else { - CUDF_EXPECTS(lhs.type().id() == type_id::DECIMAL64, "Unexpected DTYPE"); + } else if (lhs.type().id() == type_id::DECIMAL64) { auto const factor = numeric::detail::ipow(diff); auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); return binary_operation(*scalar, lhs, binary_operator::MUL, rhs.type(), stream, mr); + } else { + CUDF_EXPECTS(lhs.type().id() == type_id::DECIMAL128, "Unexpected DTYPE"); + auto const factor = numeric::detail::ipow<__int128_t, Radix::BASE_10>(diff); + auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); + return binary_operation(*scalar, lhs, binary_operator::MUL, rhs.type(), stream, mr); } }(); binops::jit::binary_operation(out_view, result->view(), rhs, op, stream); @@ -565,11 +589,15 @@ std::unique_ptr fixed_point_binary_operation(column_view const& lhs, auto const factor = numeric::detail::ipow(diff); auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); return binary_operation(*scalar, rhs, binary_operator::MUL, lhs.type(), stream, mr); - } else { - CUDF_EXPECTS(lhs.type().id() == type_id::DECIMAL64, "Unexpected DTYPE"); + } else if (lhs.type().id() == type_id::DECIMAL64) { auto const factor = numeric::detail::ipow(diff); auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); return binary_operation(*scalar, rhs, binary_operator::MUL, lhs.type(), stream, mr); + } else { + CUDF_EXPECTS(lhs.type().id() == type_id::DECIMAL128, "Unexpected DTYPE"); + auto const factor = numeric::detail::ipow<__int128_t, Radix::BASE_10>(diff); + auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); + return binary_operation(*scalar, rhs, binary_operator::MUL, lhs.type(), stream, mr); } }(); binops::jit::binary_operation(out_view, lhs, result->view(), op, stream); @@ -684,7 +712,7 @@ std::unique_ptr binary_operation(column_view const& lhs, auto new_mask = bitmask_and(table_view({lhs, rhs}), stream, mr); auto out = make_fixed_width_column( - output_type, lhs.size(), std::move(new_mask), cudf::UNKNOWN_NULL_COUNT, stream, mr); + output_type, lhs.size(), std::move(new_mask), cudf::UNKNOWN_NULL_COUNT, stream, mr); // Check for 0 sized data if (lhs.is_empty() or rhs.is_empty()) return out; From ad5fe3537d4aaf580bf3f51e4eb3d505584098d5 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Sat, 24 Jul 2021 21:43:00 +0000 Subject: [PATCH 012/112] detail::to_string --- cpp/include/cudf/fixed_point/fixed_point.hpp | 54 ++++++++++++++------ 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index abaecf00925..cb18bb8ef8d 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -154,6 +154,30 @@ CUDA_HOST_DEVICE_CALLABLE constexpr T shift(T const& val, scale_type const& scal return left_shift(val, scale); } +template +auto to_string(T value) -> std::string +{ + if constexpr (std::is_same::value) { + auto s = std::string{}; + auto const sign = value < 0; + while (value) { + s.push_back("0123456789"[value % 10]); + value /= 10; + } + if (sign) s.push_back('-'); + std::reverse(s.begin(), s.end()); + return s; + } else { + return std::to_string(value); + } +} + +template +auto abs(T value) +{ + return value >= 0 ? value : -value; +} + } // namespace detail /** @@ -552,24 +576,20 @@ class fixed_point { */ explicit operator std::string() const { - if constexpr (not std::is_same::value) { - if (_scale < 0) { - auto const av = std::abs(_value); - int64_t const n = std::pow(10, -_scale); - int64_t const f = av % n; - auto const num_zeros = - std::max(0, (-_scale - static_cast(std::to_string(f).size()))); - auto const zeros = std::string(num_zeros, '0'); - auto const sign = _value < 0 ? std::string("-") : std::string(); - return sign + std::to_string(av / n) + std::string(".") + zeros + std::to_string(av % n); - } else { - auto const zeros = std::string(_scale, '0'); - return std::to_string(_value) + zeros; - } + if (_scale < 0) { + auto const av = detail::abs(_value); + Rep const n = std::pow(10, -_scale); // does this work for all values of __int128 + Rep const f = av % n; + auto const num_zeros = + std::max(0, (-_scale - static_cast(detail::to_string(f).size()))); + auto const zeros = std::string(num_zeros, '0'); + auto const sign = _value < 0 ? std::string("-") : std::string(); + return sign + detail::to_string(av / n) + std::string(".") + zeros + + detail::to_string(av % n); + } else { + auto const zeros = std::string(_scale, '0'); + return detail::to_string(_value) + zeros; } - - // std::abs and std::to_string don't work on __int128_t - return "TODO"; } }; From 7cc9db1ca5a2752be0405bdaedcf7480a49f5027 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Sat, 24 Jul 2021 21:43:13 +0000 Subject: [PATCH 013/112] Aggregation changes --- .../cudf/detail/aggregation/aggregation.cuh | 13 ++++++++----- .../cudf/detail/aggregation/aggregation.hpp | 15 ++++++++++++--- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh index c64fba286d4..848d52f3a44 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.cuh +++ b/cpp/include/cudf/detail/aggregation/aggregation.cuh @@ -593,9 +593,7 @@ struct identity_initializer { template static constexpr bool is_supported() { - // Note: !is_fixed_point() means that aggregations for fixed_point should happen on the - // underlying type (see device_storage_type_t), not that fixed_point is not supported - return cudf::is_fixed_width() && !is_fixed_point() and + return cudf::is_fixed_width() and (k == aggregation::SUM or k == aggregation::MIN or k == aggregation::MAX or k == aggregation::COUNT_VALID or k == aggregation::COUNT_ALL or k == aggregation::ARGMAX or k == aggregation::ARGMIN or @@ -608,7 +606,8 @@ struct identity_initializer { std::enable_if_t, void>::value, T> identity_from_operator() { - return corresponding_operator_t::template identity(); + using DeviceType = device_storage_type_t; + return corresponding_operator_t::template identity(); } template @@ -637,7 +636,11 @@ struct identity_initializer { std::enable_if_t(), void> operator()(mutable_column_view const& col, rmm::cuda_stream_view stream) { - thrust::fill(rmm::exec_policy(stream), col.begin(), col.end(), get_identity()); + using DeviceType = device_storage_type_t; + thrust::fill(rmm::exec_policy(stream), + col.begin(), + col.end(), + get_identity()); } template diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 10d9d8c1b92..3830a6c96a2 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -923,13 +923,22 @@ struct target_type_impl< using type = int64_t; }; -// Summing fixed_point numbers, always use the decimal64 accumulator +// Summing fixed_point numbers +template +struct target_type_impl() && + not std::is_same::value && + (k == aggregation::SUM)>> { + using type = numeric::decimal64; +}; + template struct target_type_impl< Source, k, - std::enable_if_t() && (k == aggregation::SUM)>> { - using type = numeric::decimal64; + std::enable_if_t::value && (k == aggregation::SUM)>> { + using type = numeric::decimal128; }; // Summing/Multiplying float/doubles, use same type accumulator From 5dd6874bfd66348a0848a2d810f50fd584ccf5cf Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Sun, 25 Jul 2021 01:33:23 +0000 Subject: [PATCH 014/112] Small fix in fixed_point.hpp --- cpp/include/cudf/fixed_point/fixed_point.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index cb18bb8ef8d..0d0d3938588 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -157,7 +157,7 @@ CUDA_HOST_DEVICE_CALLABLE constexpr T shift(T const& val, scale_type const& scal template auto to_string(T value) -> std::string { - if constexpr (std::is_same::value) { + if constexpr (cuda::std::is_same::value) { auto s = std::string{}; auto const sign = value < 0; while (value) { @@ -304,7 +304,7 @@ class fixed_point { */ template ::value or - std::is_same::value>* = nullptr> + cuda::std::is_same::value>* = nullptr> explicit constexpr operator U() const { // Don't cast to U until converting to Rep because in certain cases casting to U before shifting From a89f958773aab316302c609288d9a1878220e3ba Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Sun, 25 Jul 2021 01:33:49 +0000 Subject: [PATCH 015/112] Enable quantile --- cpp/src/quantiles/quantile.cu | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/cpp/src/quantiles/quantile.cu b/cpp/src/quantiles/quantile.cu index 89ec7ee7a47..25bf4a436ad 100644 --- a/cpp/src/quantiles/quantile.cu +++ b/cpp/src/quantiles/quantile.cu @@ -47,8 +47,7 @@ struct quantile_functor { rmm::mr::device_memory_resource* mr; template - std::enable_if_t::value and not cudf::is_fixed_point() or - std::is_same::value, // TODO + std::enable_if_t::value and not cudf::is_fixed_point(), std::unique_ptr> operator()(column_view const& input) { @@ -56,9 +55,7 @@ struct quantile_functor { } template - std::enable_if_t::value or - cudf::is_fixed_point() and - not std::is_same::value, // TODO + std::enable_if_t::value or cudf::is_fixed_point(), std::unique_ptr> operator()(column_view const& input) { From a16a2b8e282917967b806391d04faf2473ca6ac6 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Mon, 26 Jul 2021 15:53:58 +0000 Subject: [PATCH 016/112] Comment update --- cpp/src/reductions/simple.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/reductions/simple.cuh b/cpp/src/reductions/simple.cuh index 61002481ddc..58443fc10f9 100644 --- a/cpp/src/reductions/simple.cuh +++ b/cpp/src/reductions/simple.cuh @@ -396,7 +396,7 @@ struct element_type_dispatcher { } /** - * @brief Specialization for reducing integer column types to any output type. + * @brief Specialization for reducing fixed_point column types to fixed_point number */ template ()>* = nullptr> From e89a9ba43f2d08e37fb7222a185e3e6722dfb51e Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Mon, 26 Jul 2021 15:54:52 +0000 Subject: [PATCH 017/112] REDUCTION_TEST working changes --- .../detail/utilities/device_operators.cuh | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/device_operators.cuh b/cpp/include/cudf/detail/utilities/device_operators.cuh index 3a1bb91b56c..0eed84880ea 100644 --- a/cpp/include/cudf/detail/utilities/device_operators.cuh +++ b/cpp/include/cudf/detail/utilities/device_operators.cuh @@ -89,7 +89,7 @@ struct DeviceMin { template CUDA_HOST_DEVICE_CALLABLE T operator()(const T& lhs, const T& rhs) { - return std::min(lhs, rhs); + return lhs < rhs ? lhs : rhs; } template < @@ -98,6 +98,14 @@ struct DeviceMin { !cudf::is_dictionary() && !cudf::is_fixed_point()>* = nullptr> static constexpr T identity() { + if constexpr (std::is_same::value) { + __int128_t max = 1; + for (int i = 0; i < 126; ++i) { + max *= 2; + } + return max + (max - 1); + } + return std::numeric_limits::max(); } @@ -128,7 +136,7 @@ struct DeviceMax { template CUDA_HOST_DEVICE_CALLABLE T operator()(const T& lhs, const T& rhs) { - return std::max(lhs, rhs); + return lhs > rhs ? lhs : rhs; } template < @@ -137,6 +145,14 @@ struct DeviceMax { !cudf::is_dictionary() && !cudf::is_fixed_point()>* = nullptr> static constexpr T identity() { + if constexpr (std::is_same::value) { + __int128_t lowest = -1; + for (int i = 0; i < 127; ++i) { + lowest *= 2; + } + return lowest; + } + return std::numeric_limits::lowest(); } From 7ef28bf706a022a16696a388458c6976ca17b368 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Mon, 26 Jul 2021 18:39:53 +0000 Subject: [PATCH 018/112] ROLLING_TEST changes --- cpp/src/aggregation/aggregation.cpp | 5 +++-- cpp/tests/rolling/rolling_test.cpp | 14 ++++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp index 53a55351f8e..8eff18bf966 100644 --- a/cpp/src/aggregation/aggregation.cpp +++ b/cpp/src/aggregation/aggregation.cpp @@ -594,8 +594,9 @@ struct target_type_functor { constexpr data_type operator()() const noexcept { auto const id = type_to_id>(); - return id == type_id::DECIMAL32 || id == type_id::DECIMAL64 ? data_type{id, type.scale()} - : data_type{id}; + return id == type_id::DECIMAL32 || id == type_id::DECIMAL64 || id == type_id::DECIMAL128 + ? data_type{id, type.scale()} + : data_type{id}; } }; diff --git a/cpp/tests/rolling/rolling_test.cpp b/cpp/tests/rolling/rolling_test.cpp index a67e670acb7..19778e22f10 100644 --- a/cpp/tests/rolling/rolling_test.cpp +++ b/cpp/tests/rolling/rolling_test.cpp @@ -32,6 +32,7 @@ #include +#include #include using cudf::bitmask_type; @@ -1087,15 +1088,16 @@ TYPED_TEST(FixedPointTests, MinMaxCountLagLeadNulls) { using namespace numeric; using namespace cudf; - using decimalXX = TypeParam; - using RepType = cudf::device_storage_type_t; - using fp_wrapper = cudf::test::fixed_point_column_wrapper; - using fp64_wrapper = cudf::test::fixed_point_column_wrapper; - using fw_wrapper = cudf::test::fixed_width_column_wrapper; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + using sum_type = std::conditional_t, __int128_t, int64_t>; + using fpsum_wrapper = cudf::test::fixed_point_column_wrapper; + using fw_wrapper = cudf::test::fixed_width_column_wrapper; auto const scale = scale_type{-1}; auto const input = fp_wrapper{{42, 1729, 55, 343, 1, 2}, {1, 0, 1, 0, 1, 1}, scale}; - auto const expected_sum = fp64_wrapper{{42, 97, 55, 56, 3, 3}, {1, 1, 1, 1, 1, 1}, scale}; + auto const expected_sum = fpsum_wrapper{{42, 97, 55, 56, 3, 3}, {1, 1, 1, 1, 1, 1}, scale}; auto const expected_min = fp_wrapper{{42, 42, 55, 1, 1, 1}, {1, 1, 1, 1, 1, 1}, scale}; auto const expected_max = fp_wrapper{{42, 55, 55, 55, 2, 2}, {1, 1, 1, 1, 1, 1}, scale}; auto const expected_lag = fp_wrapper{{0, 42, 1729, 55, 343, 1}, {0, 1, 0, 1, 0, 1}, scale}; From 7fd4ac41debff8cfbf5551d6fa959dcc8c98ddf6 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Mon, 26 Jul 2021 19:47:05 +0000 Subject: [PATCH 019/112] Initial changes for STRINGS_TEST --- cpp/include/cudf/fixed_point/fixed_point.hpp | 2 +- .../strings/convert/convert_fixed_point.cu | 44 +++++++++---------- cpp/src/strings/convert/utilities.cuh | 1 + 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index 0d0d3938588..05ca724c358 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -173,7 +173,7 @@ auto to_string(T value) -> std::string } template -auto abs(T value) +CUDA_HOST_DEVICE_CALLABLE auto abs(T value) { return value >= 0 ? value : -value; } diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu index be8993cd7ef..20dba8689d8 100644 --- a/cpp/src/strings/convert/convert_fixed_point.cu +++ b/cpp/src/strings/convert/convert_fixed_point.cu @@ -115,7 +115,7 @@ struct string_to_decimal_check_fn { return (exp_ten < scale) ? true : value <= static_cast( - std::numeric_limits::max() / + std::numeric_limits::max() / // TODO probably broken static_cast(exp10(static_cast(exp_ten - scale)))); } }; @@ -206,18 +206,16 @@ struct decimal_to_string_size_fn { if (scale >= 0) return count_digits(value) + scale; - if constexpr (not std::is_same::value) { // TODO - auto const abs_value = std::abs(value); - auto const exp_ten = static_cast(exp10(static_cast(-scale))); - auto const fraction = count_digits(abs_value % exp_ten); - auto const num_zeros = std::max(0, (-scale - fraction)); - return static_cast(value < 0) + // sign if negative - count_digits(abs_value / exp_ten) + // integer - 1 + // decimal point - num_zeros + // zeros padding - fraction; // size of fraction - } - return 0; + auto const abs_value = numeric::detail::abs(value); + auto const exp_ten = static_cast(exp10( + static_cast(-scale))); // TODO probably broken (might need numeric::detail::exp10) + auto const fraction = count_digits(abs_value % exp_ten); + auto const num_zeros = std::max(0, (-scale - fraction)); + return static_cast(value < 0) + // sign if negative + count_digits(abs_value / exp_ten) + // integer + 1 + // decimal point + num_zeros + // zeros padding + fraction; // size of fraction } }; @@ -250,20 +248,18 @@ struct decimal_to_string_fn { // write format: [-]integer.fraction // where integer = abs(value) / (10^abs(scale)) // fraction = abs(value) % (10^abs(scale)) - if constexpr (not std::is_same::value) { // TODO fix - auto const abs_value = std::abs(value); - if (value < 0) *d_buffer++ = '-'; // add sign - auto const exp_ten = static_cast(exp10(static_cast(-scale))); - auto const num_zeros = std::max(0, (-scale - count_digits(abs_value % exp_ten))); + auto const abs_value = numeric::detail::abs(value); + if (value < 0) *d_buffer++ = '-'; // add sign + auto const exp_ten = static_cast(exp10(static_cast(-scale))); + auto const num_zeros = std::max(0, (-scale - count_digits(abs_value % exp_ten))); - d_buffer += integer_to_string(abs_value / exp_ten, d_buffer); // add the integer part - *d_buffer++ = '.'; // add decimal point + d_buffer += integer_to_string(abs_value / exp_ten, d_buffer); // add the integer part + *d_buffer++ = '.'; // add decimal point - thrust::generate_n(thrust::seq, d_buffer, num_zeros, []() { return '0'; }); // add zeros - d_buffer += num_zeros; + thrust::generate_n(thrust::seq, d_buffer, num_zeros, []() { return '0'; }); // add zeros + d_buffer += num_zeros; - integer_to_string(abs_value % exp_ten, d_buffer); // add the fraction part - } + integer_to_string(abs_value % exp_ten, d_buffer); // add the fraction part } }; diff --git a/cpp/src/strings/convert/utilities.cuh b/cpp/src/strings/convert/utilities.cuh index 746923526a1..66606314261 100644 --- a/cpp/src/strings/convert/utilities.cuh +++ b/cpp/src/strings/convert/utilities.cuh @@ -96,6 +96,7 @@ __device__ inline size_type integer_to_string(IntegerType value, char* d_buffer) template constexpr size_type count_digits(IntegerType value) { + // TODO definitely broken if (value == 0) return 1; bool is_negative = std::is_signed::value ? (value < 0) : false; // abs(std::numeric_limits::min()) is negative; From 016c35aed3d40277efb1023a2a8d7883df04ceee Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 27 Jul 2021 06:26:09 +0000 Subject: [PATCH 020/112] STRINGS changes --- .../cudf/detail/utilities/integer_utils.hpp | 12 +- cpp/include/cudf/fixed_point/fixed_point.hpp | 25 +--- cpp/include/cudf/fixed_point/temporary.hpp | 109 ++++++++++++++++++ .../strings/convert/convert_fixed_point.cu | 2 +- cpp/src/strings/convert/utilities.cuh | 6 +- 5 files changed, 118 insertions(+), 36 deletions(-) create mode 100644 cpp/include/cudf/fixed_point/temporary.hpp diff --git a/cpp/include/cudf/detail/utilities/integer_utils.hpp b/cpp/include/cudf/detail/utilities/integer_utils.hpp index dc919433da7..365ee1e91f4 100644 --- a/cpp/include/cudf/detail/utilities/integer_utils.hpp +++ b/cpp/include/cudf/detail/utilities/integer_utils.hpp @@ -22,6 +22,8 @@ * @file Utility code involving integer arithmetic */ +#include + #include #include #include @@ -151,17 +153,11 @@ constexpr inline bool is_a_power_of_two(I val) noexcept * @return Absolute value if value type is signed. */ template -std::enable_if_t::value, T> constexpr inline absolute_value(T value) -{ - return std::abs(value); -} -// Unsigned type just returns itself. -template -std::enable_if_t::value, T> constexpr inline absolute_value(T value) +constexpr inline auto absolute_value(T value) -> T { + if constexpr (numeric::detail::is_signed()) return numeric::detail::abs(value); return value; } } // namespace util - } // namespace cudf diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index 05ca724c358..229f39f9389 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include // Note: The versions are used in order for Jitify to work with our fixed_point type. @@ -154,30 +155,6 @@ CUDA_HOST_DEVICE_CALLABLE constexpr T shift(T const& val, scale_type const& scal return left_shift(val, scale); } -template -auto to_string(T value) -> std::string -{ - if constexpr (cuda::std::is_same::value) { - auto s = std::string{}; - auto const sign = value < 0; - while (value) { - s.push_back("0123456789"[value % 10]); - value /= 10; - } - if (sign) s.push_back('-'); - std::reverse(s.begin(), s.end()); - return s; - } else { - return std::to_string(value); - } -} - -template -CUDA_HOST_DEVICE_CALLABLE auto abs(T value) -{ - return value >= 0 ? value : -value; -} - } // namespace detail /** diff --git a/cpp/include/cudf/fixed_point/temporary.hpp b/cpp/include/cudf/fixed_point/temporary.hpp new file mode 100644 index 00000000000..3c487a28d74 --- /dev/null +++ b/cpp/include/cudf/fixed_point/temporary.hpp @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +// Note: The versions are used in order for Jitify to work with our fixed_point type. +// Jitify is needed for several algorithms (binaryop, rolling, etc) +#include +#include // add cuda namespace + +#include +#include +#include +#include + +//! `fixed_point` and supporting types +namespace numeric { +namespace detail { +namespace numeric_limits { + +template +auto max() -> T +{ + if constexpr (std::is_same_v) { + // −170,141,183,460,469,231,731,687,303,715,884,105,728 + __int128_t max = 1; + for (int i = 0; i < 126; ++i) { + max *= 2; + } + return max + (max - 1); + } + + return std::numeric_limits::max(); +} + +template +auto lowest() -> T +{ + if constexpr (std::is_same_v) { + // 170,141,183,460,469,231,731,687,303,715,884,105,728 + __int128_t lowest = -1; + for (int i = 0; i < 127; ++i) { + lowest *= 2; + } + return lowest; + } + + return std::numeric_limits::lowest(); +} + +} // namespace numeric_limits + +template +auto to_string(T value) -> std::string +{ + if constexpr (cuda::std::is_same::value) { + auto s = std::string{}; + auto const sign = value < 0; + if (sign) { + value += 1; // avoid overflowing if value == _int128_t lowest + value *= -1; + if (value == detail::numeric_limits::max<__int128_t>()) + return "-170141183460469231731687303715884105728"; + value += 1; // can add back the one, not need to avoid overflow anymore + } + while (value) { + s.push_back("0123456789"[value % 10]); + value /= 10; + } + if (sign) s.push_back('-'); + std::reverse(s.begin(), s.end()); + return s; + } else { + return std::to_string(value); + } +} + +template +CUDA_HOST_DEVICE_CALLABLE constexpr auto abs(T value) +{ + return value >= 0 ? value : -value; +} + +template +CUDA_HOST_DEVICE_CALLABLE constexpr auto is_signed() +{ + return std::is_signed::value || std::is_same_v; +} + +} // namespace detail + +/** @} */ // end of group +} // namespace numeric diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu index 20dba8689d8..9d0a6a3fdd2 100644 --- a/cpp/src/strings/convert/convert_fixed_point.cu +++ b/cpp/src/strings/convert/convert_fixed_point.cu @@ -248,8 +248,8 @@ struct decimal_to_string_fn { // write format: [-]integer.fraction // where integer = abs(value) / (10^abs(scale)) // fraction = abs(value) % (10^abs(scale)) - auto const abs_value = numeric::detail::abs(value); if (value < 0) *d_buffer++ = '-'; // add sign + auto const abs_value = numeric::detail::abs(value); auto const exp_ten = static_cast(exp10(static_cast(-scale))); auto const num_zeros = std::max(0, (-scale - count_digits(abs_value % exp_ten))); diff --git a/cpp/src/strings/convert/utilities.cuh b/cpp/src/strings/convert/utilities.cuh index 66606314261..6a6c92ba7c7 100644 --- a/cpp/src/strings/convert/utilities.cuh +++ b/cpp/src/strings/convert/utilities.cuh @@ -64,8 +64,8 @@ __device__ inline size_type integer_to_string(IntegerType value, char* d_buffer) *d_buffer = '0'; return 1; } - bool const is_negative = std::is_signed::value ? (value < 0) : false; - // + bool const is_negative = numeric::detail::is_signed() ? (value < 0) : false; + constexpr IntegerType base = 10; constexpr int MAX_DIGITS = 20; // largest 64-bit integer is 20 digits char digits[MAX_DIGITS]; // place-holder for digit chars @@ -98,7 +98,7 @@ constexpr size_type count_digits(IntegerType value) { // TODO definitely broken if (value == 0) return 1; - bool is_negative = std::is_signed::value ? (value < 0) : false; + bool is_negative = numeric::detail::is_signed() ? (value < 0) : false; // abs(std::numeric_limits::min()) is negative; // for all integer types, the max() and min() values have the same number of digits value = (value == std::numeric_limits::min()) From dbd050429adff62d945fb71852c9ca42f543ba0e Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 27 Jul 2021 07:23:25 +0000 Subject: [PATCH 021/112] Clean up --- .../detail/utilities/device_operators.cuh | 5 ++-- cpp/include/cudf/fixed_point/temporary.hpp | 24 +++++++++++++++---- cpp/src/unary/math_ops.cu | 6 +---- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/device_operators.cuh b/cpp/include/cudf/detail/utilities/device_operators.cuh index 0eed84880ea..57065989df9 100644 --- a/cpp/include/cudf/detail/utilities/device_operators.cuh +++ b/cpp/include/cudf/detail/utilities/device_operators.cuh @@ -22,6 +22,7 @@ */ #include +#include #include #include #include @@ -89,7 +90,7 @@ struct DeviceMin { template CUDA_HOST_DEVICE_CALLABLE T operator()(const T& lhs, const T& rhs) { - return lhs < rhs ? lhs : rhs; + return numeric::detail::min(lhs, rhs); } template < @@ -136,7 +137,7 @@ struct DeviceMax { template CUDA_HOST_DEVICE_CALLABLE T operator()(const T& lhs, const T& rhs) { - return lhs > rhs ? lhs : rhs; + return numeric::detail::max(lhs, rhs); } template < diff --git a/cpp/include/cudf/fixed_point/temporary.hpp b/cpp/include/cudf/fixed_point/temporary.hpp index 3c487a28d74..e3e598daa55 100644 --- a/cpp/include/cudf/fixed_point/temporary.hpp +++ b/cpp/include/cudf/fixed_point/temporary.hpp @@ -38,7 +38,7 @@ template auto max() -> T { if constexpr (std::is_same_v) { - // −170,141,183,460,469,231,731,687,303,715,884,105,728 + // 170,141,183,460,469,231,731,687,303,715,884,105,727 __int128_t max = 1; for (int i = 0; i < 126; ++i) { max *= 2; @@ -53,7 +53,7 @@ template auto lowest() -> T { if constexpr (std::is_same_v) { - // 170,141,183,460,469,231,731,687,303,715,884,105,728 + // -170,141,183,460,469,231,731,687,303,715,884,105,728 __int128_t lowest = -1; for (int i = 0; i < 127; ++i) { lowest *= 2; @@ -92,17 +92,33 @@ auto to_string(T value) -> std::string } template -CUDA_HOST_DEVICE_CALLABLE constexpr auto abs(T value) +constexpr auto abs(T value) { return value >= 0 ? value : -value; } template -CUDA_HOST_DEVICE_CALLABLE constexpr auto is_signed() +CUDA_HOST_DEVICE_CALLABLE auto min(T lhs, T rhs) +{ + return lhs < rhs ? lhs : rhs; +} + +template +CUDA_HOST_DEVICE_CALLABLE auto max(T lhs, T rhs) +{ + return lhs > rhs ? lhs : rhs; +} + +template +constexpr auto is_signed() { return std::is_signed::value || std::is_same_v; } +// TODO add is_integral +// TODO add is_arithmetic +// TODO pull down upstream, then regex replace is_same<>::value + } // namespace detail /** @} */ // end of group diff --git a/cpp/src/unary/math_ops.cu b/cpp/src/unary/math_ops.cu index 5c44ca3e445..6359c50c21a 100644 --- a/cpp/src/unary/math_ops.cu +++ b/cpp/src/unary/math_ops.cu @@ -271,11 +271,7 @@ struct fixed_point_floor { template struct fixed_point_abs { T n; - __device__ T operator()(T data) - { - // std::abs does not work for __int128_t - return data > 0 ? data : data * -1; - } + __device__ T operator()(T data) { return numeric::detail::abs(data); } }; template typename FixedPointFunctor> From bf34d20b545c239dc12e6cf2544e488dfab6ac12 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 27 Jul 2021 14:57:02 +0000 Subject: [PATCH 022/112] std::is_same_v --- .../cudf/column/column_device_view.cuh | 2 +- .../cudf/detail/aggregation/aggregation.cuh | 12 +++++----- cpp/include/cudf/detail/copy_if.cuh | 2 +- .../detail/utilities/device_operators.cuh | 4 ++-- cpp/include/cudf/fixed_point/fixed_point.hpp | 6 ++--- cpp/include/cudf/fixed_point/temporary.hpp | 6 ++--- cpp/include/cudf/utilities/traits.hpp | 2 +- cpp/src/reductions/scan/scan_exclusive.cu | 4 ++-- cpp/src/reductions/scan/scan_inclusive.cu | 4 ++-- cpp/src/round/round.cu | 24 +++++++++---------- 10 files changed, 33 insertions(+), 33 deletions(-) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 6aa6eaf7de8..07e05083734 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -465,7 +465,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * @param element_index Position of the desired element * @return numeric::decimal128 representing the element at this index */ - template ::value)> + template )> __device__ T element(size_type element_index) const noexcept { using namespace numeric; diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh index 848d52f3a44..af76f07af16 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.cuh +++ b/cpp/include/cudf/detail/aggregation/aggregation.cuh @@ -139,7 +139,7 @@ struct update_target_element< { if (source_has_nulls and source.is_null(source_index)) { return; } - if constexpr (not std::is_same::value) { + if constexpr (not std::is_same_v) { using Target = target_type_t; atomicMin(&target.element(target_index), static_cast(source.element(source_index))); @@ -166,7 +166,7 @@ struct update_target_element; using DeviceSource = device_storage_type_t; - if constexpr (not std::is_same::value) { + if constexpr (not std::is_same_v) { atomicMin(&target.element(target_index), static_cast(source.element(source_index))); } @@ -189,7 +189,7 @@ struct update_target_element< { if (source_has_nulls and source.is_null(source_index)) { return; } - if constexpr (not std::is_same::value) { + if constexpr (not std::is_same_v) { using Target = target_type_t; atomicMax(&target.element(target_index), static_cast(source.element(source_index))); @@ -216,7 +216,7 @@ struct update_target_element; using DeviceSource = device_storage_type_t; - if constexpr (not std::is_same::value) { + if constexpr (not std::is_same_v) { atomicMax(&target.element(target_index), static_cast(source.element(source_index))); } @@ -239,7 +239,7 @@ struct update_target_element< { if (source_has_nulls and source.is_null(source_index)) { return; } - if constexpr (not std::is_same::value) { + if constexpr (not std::is_same_v) { using Target = target_type_t; atomicAdd(&target.element(target_index), static_cast(source.element(source_index))); @@ -266,7 +266,7 @@ struct update_target_element; using DeviceSource = device_storage_type_t; - if constexpr (not std::is_same::value) { + if constexpr (not std::is_same_v) { atomicAdd(&target.element(target_index), static_cast(source.element(source_index))); } diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh index ca2592eab3d..06efcaedf6c 100644 --- a/cpp/include/cudf/detail/copy_if.cuh +++ b/cpp/include/cudf/detail/copy_if.cuh @@ -224,7 +224,7 @@ struct DeviceType>> { }; template -struct DeviceType::value>> { +struct DeviceType>> { using type = typename cudf::device_storage_type_t; }; diff --git a/cpp/include/cudf/detail/utilities/device_operators.cuh b/cpp/include/cudf/detail/utilities/device_operators.cuh index 5d6ec6e94bf..90449982cc2 100644 --- a/cpp/include/cudf/detail/utilities/device_operators.cuh +++ b/cpp/include/cudf/detail/utilities/device_operators.cuh @@ -99,7 +99,7 @@ struct DeviceMin { !cudf::is_fixed_point()>* = nullptr> static constexpr T identity() { - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { __int128_t max = 1; for (int i = 0; i < 126; ++i) { max *= 2; @@ -145,7 +145,7 @@ struct DeviceMax { !cudf::is_fixed_point()>* = nullptr> static constexpr T identity() { - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { __int128_t lowest = -1; for (int i = 0; i < 127; ++i) { lowest *= 2; diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index 08dbb20a794..8dcc2952bbe 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -49,7 +49,7 @@ enum class Radix : int32_t { BASE_2 = 2, BASE_10 = 10 }; template constexpr inline auto is_supported_representation_type() { - return cuda::std::is_same_v || cuda::std::is_same || + return cuda::std::is_same_v || cuda::std::is_same_v || cuda::std::is_same_v; } @@ -58,7 +58,7 @@ template constexpr inline auto is_supported_construction_value_type() { return cuda::std::is_integral::value || cuda::std::is_floating_point::value || - cuda::std::is_same::value; + cuda::std::is_same_v; } // Helper functions for `fixed_point` type @@ -281,7 +281,7 @@ class fixed_point { */ template ::value or - cuda::std::is_same::value>* = nullptr> + cuda::std::is_same_v>* = nullptr> explicit constexpr operator U() const { // Don't cast to U until converting to Rep because in certain cases casting to U before shifting diff --git a/cpp/include/cudf/fixed_point/temporary.hpp b/cpp/include/cudf/fixed_point/temporary.hpp index e3e598daa55..82a12540001 100644 --- a/cpp/include/cudf/fixed_point/temporary.hpp +++ b/cpp/include/cudf/fixed_point/temporary.hpp @@ -32,7 +32,7 @@ //! `fixed_point` and supporting types namespace numeric { namespace detail { -namespace numeric_limits { +namespace numeric_limits { // TODO switch this to struct template auto max() -> T @@ -69,7 +69,7 @@ auto lowest() -> T template auto to_string(T value) -> std::string { - if constexpr (cuda::std::is_same::value) { + if constexpr (cuda::std::is_same_v) { auto s = std::string{}; auto const sign = value < 0; if (sign) { @@ -117,7 +117,7 @@ constexpr auto is_signed() // TODO add is_integral // TODO add is_arithmetic -// TODO pull down upstream, then regex replace is_same<>::value +// TODO pull down upstream, then regex replace is_same_v<> } // namespace detail diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index e6d50442cc6..a0d654b4307 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -153,7 +153,7 @@ template constexpr inline bool is_numeric() { return std::is_integral::value or std::is_floating_point::value or - std::is_same::value; + std::is_same_v; } struct is_numeric_impl { diff --git a/cpp/src/reductions/scan/scan_exclusive.cu b/cpp/src/reductions/scan/scan_exclusive.cu index 4e40e47538f..664d3bcebad 100644 --- a/cpp/src/reductions/scan/scan_exclusive.cu +++ b/cpp/src/reductions/scan/scan_exclusive.cu @@ -52,7 +52,7 @@ struct scan_dispatcher { */ template ::value || - std::is_same::value>* = nullptr> + std::is_same_v>* = nullptr> std::unique_ptr operator()(column_view const& input, null_policy, rmm::cuda_stream_view stream, @@ -74,7 +74,7 @@ struct scan_dispatcher { } template - std::enable_if_t::value and not std::is_same::value, + std::enable_if_t::value and not std::is_same_v, std::unique_ptr> operator()(Args&&...) { diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu index cc5107f591a..aa075cd5543 100644 --- a/cpp/src/reductions/scan/scan_inclusive.cu +++ b/cpp/src/reductions/scan/scan_inclusive.cu @@ -123,13 +123,13 @@ struct scan_dispatcher { static constexpr bool is_supported() { return std::is_arithmetic::value || is_string_supported() || - std::is_same::value; + std::is_same_v; } // for arithmetic types template < typename T, - std::enable_if_t::value || std::is_same::value>* = nullptr> + std::enable_if_t::value || std::is_same_v>* = nullptr> auto inclusive_scan(column_view const& input_view, null_policy, rmm::cuda_stream_view stream, diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index 559735cc8d3..d974da7353a 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -86,9 +86,9 @@ struct half_up_zero { return generic_round(e); } - template ::value or - std::is_same::value>* = nullptr> + std::is_same_v>* = nullptr> __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation @@ -107,9 +107,9 @@ struct half_up_positive { return integer_part + generic_round(fractional_part * n) / n; } - template ::value or - std::is_same::value>* = nullptr> + std::is_same_v>* = nullptr> __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation @@ -126,9 +126,9 @@ struct half_up_negative { return generic_round(e / n) * n; } - template ::value or - std::is_same::value>* = nullptr> + std::is_same_v>* = nullptr> __device__ U operator()(U e) { auto const down = (e / n) * n; // result from rounding down @@ -145,9 +145,9 @@ struct half_even_zero { return generic_round_half_even(e); } - template ::value or - std::is_same::value>* = nullptr> + std::is_same_v>* = nullptr> __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation @@ -166,9 +166,9 @@ struct half_even_positive { return integer_part + generic_round_half_even(fractional_part * n) / n; } - template ::value or - std::is_same::value>* = nullptr> + std::is_same_v>* = nullptr> __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation @@ -185,9 +185,9 @@ struct half_even_negative { return generic_round_half_even(e / n) * n; } - template ::value or - std::is_same::value>* = nullptr> + std::is_same_v>* = nullptr> __device__ U operator()(U e) { auto const down_over_n = e / n; // use this to determine HALF_EVEN case From 103a4db3e2c4ad66b92d2031e2a666e2aa455a34 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 27 Jul 2021 15:51:57 +0000 Subject: [PATCH 023/112] is_integral & is_arithmetic --- cpp/include/cudf/fixed_point/fixed_point.hpp | 6 ++---- cpp/include/cudf/fixed_point/temporary.hpp | 19 +++++++++++++------ cpp/include/cudf/utilities/traits.hpp | 3 +-- cpp/src/reductions/scan/scan_exclusive.cu | 9 +++------ cpp/src/reductions/scan/scan_inclusive.cu | 7 ++----- cpp/src/round/round.cu | 16 ++++------------ 6 files changed, 25 insertions(+), 35 deletions(-) diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index 8dcc2952bbe..f4254ffe4ba 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -57,8 +57,7 @@ constexpr inline auto is_supported_representation_type() template constexpr inline auto is_supported_construction_value_type() { - return cuda::std::is_integral::value || cuda::std::is_floating_point::value || - cuda::std::is_same_v; + return numeric::detail::is_integral() || cuda::std::is_floating_point::value; } // Helper functions for `fixed_point` type @@ -280,8 +279,7 @@ class fixed_point { * @return The `fixed_point` number in base 10 (aka human readable format) */ template ::value or - cuda::std::is_same_v>* = nullptr> + typename cuda::std::enable_if_t()>* = nullptr> explicit constexpr operator U() const { // Don't cast to U until converting to Rep because in certain cases casting to U before shifting diff --git a/cpp/include/cudf/fixed_point/temporary.hpp b/cpp/include/cudf/fixed_point/temporary.hpp index 82a12540001..8a33ec498ee 100644 --- a/cpp/include/cudf/fixed_point/temporary.hpp +++ b/cpp/include/cudf/fixed_point/temporary.hpp @@ -27,9 +27,10 @@ #include #include #include +#include #include +#include -//! `fixed_point` and supporting types namespace numeric { namespace detail { namespace numeric_limits { // TODO switch this to struct @@ -115,11 +116,17 @@ constexpr auto is_signed() return std::is_signed::value || std::is_same_v; } -// TODO add is_integral -// TODO add is_arithmetic -// TODO pull down upstream, then regex replace is_same_v<> +template +constexpr auto is_integral() +{ + return cuda::std::is_integral::value || cuda::std::is_same_v; +} -} // namespace detail +template +constexpr auto is_arithmetic() +{ + return numeric::detail::is_integral() || cuda::std::is_floating_point_v; +} -/** @} */ // end of group +} // namespace detail } // namespace numeric diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index a0d654b4307..dbb06865f20 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -152,8 +152,7 @@ constexpr inline bool is_equality_comparable() template constexpr inline bool is_numeric() { - return std::is_integral::value or std::is_floating_point::value or - std::is_same_v; + return numeric::detail::is_integral() or std::is_floating_point::value; } struct is_numeric_impl { diff --git a/cpp/src/reductions/scan/scan_exclusive.cu b/cpp/src/reductions/scan/scan_exclusive.cu index 664d3bcebad..5c3810743a9 100644 --- a/cpp/src/reductions/scan/scan_exclusive.cu +++ b/cpp/src/reductions/scan/scan_exclusive.cu @@ -50,9 +50,7 @@ struct scan_dispatcher { * @param mr Device memory resource used to allocate the returned column's device memory * @return Output column with scan results */ - template ::value || - std::is_same_v>* = nullptr> + template ()>* = nullptr> std::unique_ptr operator()(column_view const& input, null_policy, rmm::cuda_stream_view stream, @@ -74,9 +72,8 @@ struct scan_dispatcher { } template - std::enable_if_t::value and not std::is_same_v, - std::unique_ptr> - operator()(Args&&...) + std::enable_if_t(), std::unique_ptr> operator()( + Args&&...) { CUDF_FAIL("Non-arithmetic types not supported for exclusive scan"); } diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu index aa075cd5543..8ffcf85a492 100644 --- a/cpp/src/reductions/scan/scan_inclusive.cu +++ b/cpp/src/reductions/scan/scan_inclusive.cu @@ -122,14 +122,11 @@ struct scan_dispatcher { template static constexpr bool is_supported() { - return std::is_arithmetic::value || is_string_supported() || - std::is_same_v; + return numeric::detail::is_arithmetic::value || is_string_supported(); } // for arithmetic types - template < - typename T, - std::enable_if_t::value || std::is_same_v>* = nullptr> + template ()>* = nullptr> auto inclusive_scan(column_view const& input_view, null_policy, rmm::cuda_stream_view stream, diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index d974da7353a..d79e60bfb53 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -126,9 +126,7 @@ struct half_up_negative { return generic_round(e / n) * n; } - template ::value or - std::is_same_v>* = nullptr> + template ()>* = nullptr> __device__ U operator()(U e) { auto const down = (e / n) * n; // result from rounding down @@ -145,9 +143,7 @@ struct half_even_zero { return generic_round_half_even(e); } - template ::value or - std::is_same_v>* = nullptr> + template ()>* = nullptr> __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation @@ -166,9 +162,7 @@ struct half_even_positive { return integer_part + generic_round_half_even(fractional_part * n) / n; } - template ::value or - std::is_same_v>* = nullptr> + template ()>* = nullptr> __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation @@ -185,9 +179,7 @@ struct half_even_negative { return generic_round_half_even(e / n) * n; } - template ::value or - std::is_same_v>* = nullptr> + template ()>* = nullptr> __device__ U operator()(U e) { auto const down_over_n = e / n; // use this to determine HALF_EVEN case From 575fca771d18afd9ca8d41ffbed26706795674ca Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 27 Jul 2021 15:58:17 +0000 Subject: [PATCH 024/112] Clean up --- cpp/src/round/round.cu | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index d79e60bfb53..e5a4961b3c1 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -86,9 +86,7 @@ struct half_up_zero { return generic_round(e); } - template ::value or - std::is_same_v>* = nullptr> + template ()>* = nullptr> __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation @@ -107,10 +105,8 @@ struct half_up_positive { return integer_part + generic_round(fractional_part * n) / n; } - template ::value or - std::is_same_v>* = nullptr> - __device__ U operator()(U) + template ()>> + * = nullptr > __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation return U{}; From 85497531140b31ff148cc8f8c203e8295059deba Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 27 Jul 2021 17:28:16 +0000 Subject: [PATCH 025/112] Fixes / cleanup --- cpp/src/quantiles/quantiles_util.hpp | 6 +++++- cpp/src/reductions/scan/scan_inclusive.cu | 2 +- cpp/src/round/round.cu | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/cpp/src/quantiles/quantiles_util.hpp b/cpp/src/quantiles/quantiles_util.hpp index 1df0a4ab41a..0ab047bf97c 100644 --- a/cpp/src/quantiles/quantiles_util.hpp +++ b/cpp/src/quantiles/quantiles_util.hpp @@ -18,6 +18,7 @@ #include #include #include +#include namespace cudf { namespace detail { @@ -152,7 +153,10 @@ select_quantile(ValueAccessor get_value, size_type size, double q, interpolation } } -template +template ()>* = + nullptr> // TODO revisit if this is needed CUDA_HOST_DEVICE_CALLABLE Result select_quantile_data(Iterator begin, size_type size, double q, interpolation interp) { diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu index 8ffcf85a492..5ba500b10a7 100644 --- a/cpp/src/reductions/scan/scan_inclusive.cu +++ b/cpp/src/reductions/scan/scan_inclusive.cu @@ -122,7 +122,7 @@ struct scan_dispatcher { template static constexpr bool is_supported() { - return numeric::detail::is_arithmetic::value || is_string_supported(); + return numeric::detail::is_arithmetic() || is_string_supported(); } // for arithmetic types diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index e5a4961b3c1..ab2acc91c9d 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -105,8 +105,8 @@ struct half_up_positive { return integer_part + generic_round(fractional_part * n) / n; } - template ()>> - * = nullptr > __device__ U operator()(U) + template ()>* = nullptr> + __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation return U{}; From 22de55a7eed5517f8b6aee99ba29170e51c79a5b Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 27 Jul 2021 19:52:48 +0000 Subject: [PATCH 026/112] DECIMAL128 custom reduction tests --- cpp/tests/reductions/reduction_tests.cpp | 45 ++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index da9032737f2..69718259d00 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -1374,7 +1374,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionQuantile) for (auto const i : {0, 1, 2, 3, 4}) { auto const expected = decimalXX{scaled_integer{i + 1, scale}}; auto const result = cudf::reduce( - column, cudf::make_quantile_aggregation({i / 4.0}, cudf::interpolation::LINEAR), out_type); + column, cudf::make_quantile_aggregation({i / 4.0}, cudf::interpolation::LINEAR), out_type); auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->fixed_point_value(), expected); } @@ -1397,13 +1397,54 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionNthElement) for (auto const i : {0, 1, 2, 3}) { auto const expected = decimalXX{scaled_integer{values[i], scale}}; auto const result = cudf::reduce( - column, cudf::make_nth_element_aggregation(i, cudf::null_policy::INCLUDE), out_type); + column, cudf::make_nth_element_aggregation(i, cudf::null_policy::INCLUDE), out_type); auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->fixed_point_value(), expected); } } } +struct Decimal128Only : public cudf::test::BaseFixture { +}; + +TEST_F(Decimal128Only, Decimal128ProductReduction) +{ + using namespace numeric; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + for (auto const i : {0, -1, -2, -3}) { + auto const scale = scale_type{i}; + auto const column = fp_wrapper{{2, 2, 2, 2, 2, 2, 2, 2, 2}, scale}; + auto const out_type = cudf::data_type{cudf::type_id::DECIMAL128, scale}; + auto const expected = decimal128{scaled_integer{512, scale_type{i * 9}}}; + + auto const result = cudf::reduce(column, cudf::make_product_aggregation(), out_type); + auto const result_scalar = static_cast*>(result.get()); + + EXPECT_EQ(result_scalar->fixed_point_value(), expected); + } +} + +TEST_F(Decimal128Only, Decimal128ProductReduction2) +{ + using namespace numeric; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + for (auto const i : {0, -1, -2, -3, -4, -5, -6}) { + auto const scale = scale_type{i}; + auto const column = fp_wrapper{{1, 2, 3, 4, 5, 6}, scale}; + auto const out_type = cudf::data_type{cudf::type_id::DECIMAL128, scale}; + auto const expected = decimal128{scaled_integer{720, scale_type{i * 6}}}; + + auto const result = cudf::reduce(column, cudf::make_product_aggregation(), out_type); + auto const result_scalar = static_cast*>(result.get()); + + EXPECT_EQ(result_scalar->fixed_point_value(), expected); + } +} + TYPED_TEST(ReductionTest, NthElement) { using T = TypeParam; From 5b69c0c82a41046ece38f25a7c41e5d7c6b2aa00 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 27 Jul 2021 22:03:05 +0000 Subject: [PATCH 027/112] Another REDUCTION test --- cpp/tests/reductions/reduction_tests.cpp | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index 69718259d00..37dbb913781 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -1416,9 +1416,9 @@ TEST_F(Decimal128Only, Decimal128ProductReduction) for (auto const i : {0, -1, -2, -3}) { auto const scale = scale_type{i}; auto const column = fp_wrapper{{2, 2, 2, 2, 2, 2, 2, 2, 2}, scale}; - auto const out_type = cudf::data_type{cudf::type_id::DECIMAL128, scale}; auto const expected = decimal128{scaled_integer{512, scale_type{i * 9}}}; + auto const out_type = cudf::data_type{cudf::type_id::DECIMAL128, scale}; auto const result = cudf::reduce(column, cudf::make_product_aggregation(), out_type); auto const result_scalar = static_cast*>(result.get()); @@ -1435,9 +1435,9 @@ TEST_F(Decimal128Only, Decimal128ProductReduction2) for (auto const i : {0, -1, -2, -3, -4, -5, -6}) { auto const scale = scale_type{i}; auto const column = fp_wrapper{{1, 2, 3, 4, 5, 6}, scale}; - auto const out_type = cudf::data_type{cudf::type_id::DECIMAL128, scale}; auto const expected = decimal128{scaled_integer{720, scale_type{i * 6}}}; + auto const out_type = cudf::data_type{cudf::type_id::DECIMAL128, scale}; auto const result = cudf::reduce(column, cudf::make_product_aggregation(), out_type); auto const result_scalar = static_cast*>(result.get()); @@ -1445,6 +1445,25 @@ TEST_F(Decimal128Only, Decimal128ProductReduction2) } } +TEST_F(Decimal128Only, Decimal128ProductReduction3) +{ + using namespace numeric; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + auto const values = std::vector(127, -2); + auto const scale = scale_type{0}; + auto const column = fp_wrapper{values.cbegin(), values.cend(), scale}; + auto const lowest = numeric::detail::numeric_limits::lowest(); + auto const expected = decimal128{scaled_integer{lowest, scale}}; + + auto const out_type = cudf::data_type{cudf::type_id::DECIMAL128, scale}; + auto const result = cudf::reduce(column, cudf::make_product_aggregation(), out_type); + auto const result_scalar = static_cast*>(result.get()); + + EXPECT_EQ(result_scalar->fixed_point_value(), expected); +} + TYPED_TEST(ReductionTest, NthElement) { using T = TypeParam; From 95667c85da9ba7bf37ed93021cb804bf98a4edb7 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 28 Jul 2021 21:00:00 +0000 Subject: [PATCH 028/112] numeric_limits / temporary cleanup --- .../detail/utilities/device_operators.cuh | 20 ++----------------- cpp/include/cudf/fixed_point/temporary.hpp | 19 ++++++------------ 2 files changed, 8 insertions(+), 31 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/device_operators.cuh b/cpp/include/cudf/detail/utilities/device_operators.cuh index 90449982cc2..535f8d52ab4 100644 --- a/cpp/include/cudf/detail/utilities/device_operators.cuh +++ b/cpp/include/cudf/detail/utilities/device_operators.cuh @@ -99,15 +99,7 @@ struct DeviceMin { !cudf::is_fixed_point()>* = nullptr> static constexpr T identity() { - if constexpr (std::is_same_v) { - __int128_t max = 1; - for (int i = 0; i < 126; ++i) { - max *= 2; - } - return max + (max - 1); - } - - return std::numeric_limits::max(); + return numeric::detail::numeric_limits::max(); } template ()>* = nullptr> @@ -145,15 +137,7 @@ struct DeviceMax { !cudf::is_fixed_point()>* = nullptr> static constexpr T identity() { - if constexpr (std::is_same_v) { - __int128_t lowest = -1; - for (int i = 0; i < 127; ++i) { - lowest *= 2; - } - return lowest; - } - - return std::numeric_limits::lowest(); + return numeric::detail::numeric_limits::lowest(); } template ()>* = nullptr> diff --git a/cpp/include/cudf/fixed_point/temporary.hpp b/cpp/include/cudf/fixed_point/temporary.hpp index 8a33ec498ee..9af205d8bb4 100644 --- a/cpp/include/cudf/fixed_point/temporary.hpp +++ b/cpp/include/cudf/fixed_point/temporary.hpp @@ -16,34 +16,28 @@ #pragma once -#include #include // Note: The versions are used in order for Jitify to work with our fixed_point type. // Jitify is needed for several algorithms (binaryop, rolling, etc) -#include -#include // add cuda namespace +#include #include -#include -#include #include #include -#include namespace numeric { namespace detail { -namespace numeric_limits { // TODO switch this to struct +namespace numeric_limits { template -auto max() -> T +static constexpr auto max() -> T { if constexpr (std::is_same_v) { // 170,141,183,460,469,231,731,687,303,715,884,105,727 __int128_t max = 1; - for (int i = 0; i < 126; ++i) { + for (int i = 0; i < 126; ++i) max *= 2; - } return max + (max - 1); } @@ -51,14 +45,13 @@ auto max() -> T } template -auto lowest() -> T +static constexpr auto lowest() -> T { if constexpr (std::is_same_v) { // -170,141,183,460,469,231,731,687,303,715,884,105,728 __int128_t lowest = -1; - for (int i = 0; i < 127; ++i) { + for (int i = 0; i < 127; ++i) lowest *= 2; - } return lowest; } From 825ab86c183ccd4eb89858242262ffcd29921a54 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Thu, 29 Jul 2021 03:43:18 +0000 Subject: [PATCH 029/112] More changes, 10+ files --- .../cudf/detail/aggregation/aggregation.cuh | 8 +++++--- .../cudf/detail/utilities/hash_functions.cuh | 14 ++++++++++++++ cpp/include/cudf/fixed_point/temporary.hpp | 3 ++- cpp/include/cudf/scalar/scalar.hpp | 2 +- cpp/include/cudf/utilities/type_dispatcher.hpp | 15 ++++++++++++--- cpp/src/aggregation/aggregation.cpp | 7 +++---- cpp/src/io/orc/writer_impl.cu | 7 ++++--- cpp/src/io/parquet/reader_impl.cu | 6 ++---- cpp/src/io/parquet/writer_impl.cu | 4 ++++ cpp/src/reductions/scan/scan.cuh | 2 +- cpp/src/reductions/simple.cuh | 2 +- cpp/src/transform/row_bit_count.cu | 2 +- cpp/src/unary/cast_ops.cu | 8 ++++---- 13 files changed, 54 insertions(+), 26 deletions(-) diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh index af76f07af16..e05e83991cd 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.cuh +++ b/cpp/include/cudf/detail/aggregation/aggregation.cuh @@ -624,9 +624,11 @@ struct identity_initializer { if constexpr (cudf::is_timestamp()) return k == aggregation::ARGMAX ? T{typename T::duration(ARGMAX_SENTINEL)} : T{typename T::duration(ARGMIN_SENTINEL)}; - else - return k == aggregation::ARGMAX ? static_cast(ARGMAX_SENTINEL) - : static_cast(ARGMIN_SENTINEL); + else { + using DeviceType = device_storage_type_t; + return k == aggregation::ARGMAX ? static_cast(ARGMAX_SENTINEL) + : static_cast(ARGMIN_SENTINEL); + } } return identity_from_operator(); } diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 6eab13ae9af..a28827b05d2 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -562,6 +562,13 @@ MurmurHash3_32::operator()(numeric::decimal64 const& key) co return this->compute(key.value()); } +template <> +hash_value_type CUDA_DEVICE_CALLABLE +MurmurHash3_32::operator()(numeric::decimal128 const& key) const +{ + return this->compute(key.value()); +} + template <> hash_value_type CUDA_DEVICE_CALLABLE MurmurHash3_32::operator()(cudf::list_view const& key) const @@ -707,6 +714,13 @@ SparkMurmurHash3_32::operator()(numeric::decimal64 const& ke return this->compute(key.value()); } +template <> +hash_value_type CUDA_DEVICE_CALLABLE +SparkMurmurHash3_32::operator()(numeric::decimal128 const& key) const +{ + return this->compute<__int128_t>(key.value()); +} + template <> hash_value_type CUDA_DEVICE_CALLABLE SparkMurmurHash3_32::operator()(cudf::list_view const& key) const diff --git a/cpp/include/cudf/fixed_point/temporary.hpp b/cpp/include/cudf/fixed_point/temporary.hpp index 9af205d8bb4..12b10fee91d 100644 --- a/cpp/include/cudf/fixed_point/temporary.hpp +++ b/cpp/include/cudf/fixed_point/temporary.hpp @@ -71,7 +71,7 @@ auto to_string(T value) -> std::string value *= -1; if (value == detail::numeric_limits::max<__int128_t>()) return "-170141183460469231731687303715884105728"; - value += 1; // can add back the one, not need to avoid overflow anymore + value += 1; // can add back the one, no need to avoid overflow anymore } while (value) { s.push_back("0123456789"[value % 10]); @@ -83,6 +83,7 @@ auto to_string(T value) -> std::string } else { return std::to_string(value); } + return std::string{}; // won't ever hit here, need to supress warning though } template diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index 0e14b0c6bf5..7bf92fd6520 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -359,7 +359,7 @@ class fixed_point_scalar : public scalar { rep_type value(rmm::cuda_stream_view stream = rmm::cuda_stream_default) const; /** - * @brief Get the decimal32 or decimal64. + * @brief Get the decimal32, decimal64 or decimal128. * * @param stream CUDA stream used for device memory operations. */ diff --git a/cpp/include/cudf/utilities/type_dispatcher.hpp b/cpp/include/cudf/utilities/type_dispatcher.hpp index 07daf8d0417..e0e7254588d 100644 --- a/cpp/include/cudf/utilities/type_dispatcher.hpp +++ b/cpp/include/cudf/utilities/type_dispatcher.hpp @@ -85,8 +85,9 @@ using id_to_type = typename id_to_type_impl::type; /** * @brief "Returns" the corresponding type that is stored on the device when using `cudf::column` * - * For `decimal32`, the storage type is an `int32_t`. - * For `decimal64`, the storage type is an `int64_t`. + * For `decimal32`, the storage type is an `int32_t`. + * For `decimal64`, the storage type is an `int64_t`. + * For `decimal128`, the storage type is an `__int128_t`. * * Use this "type function" with the `using` type alias: * @code @@ -113,13 +114,21 @@ using device_storage_type_t = * @return `false` If T does not match the stored column `type_id` */ template -bool type_id_matches_device_storage_type(type_id id) +constexpr bool type_id_matches_device_storage_type(type_id id) { return (id == type_id::DECIMAL32 && std::is_same_v) || (id == type_id::DECIMAL64 && std::is_same_v) || (id == type_id::DECIMAL128 && std::is_same_v) || id == type_to_id(); } +// TODO docs +constexpr bool is_fixed_point(cudf::type_id id) +{ + return id == type_id::DECIMAL32 or // + id == type_id::DECIMAL64 or // + id == type_id::DECIMAL128; +} + /** * @brief Macro used to define a mapping between a concrete C++ type and a *`cudf::type_id` enum. diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp index 687477658f5..f4628d500bb 100644 --- a/cpp/src/aggregation/aggregation.cpp +++ b/cpp/src/aggregation/aggregation.cpp @@ -631,10 +631,9 @@ struct target_type_functor { template constexpr data_type operator()() const noexcept { - auto const id = type_to_id>(); - return id == type_id::DECIMAL32 || id == type_id::DECIMAL64 || id == type_id::DECIMAL128 - ? data_type{id, type.scale()} - : data_type{id}; + using Type = target_type_t; + auto const id = type_to_id(); + return cudf::is_fixed_point() ? data_type{id, type.scale()} : data_type{id}; } }; diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 7d7548c8858..9181a4dcc4c 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -1270,9 +1270,10 @@ encoder_decimal_info decimal_chunk_sizes(orc_table_view& orc_table, col_idx = orc_col.index()] __device__(auto idx) { auto const& col = d_cols[col_idx].cudf_column; if (col.is_null(idx)) return 0u; - int64_t const element = (col.type().id() == type_id::DECIMAL32) - ? col.element(idx) - : col.element(idx); + int64_t const element = + col.type().id() == type_id::DECIMAL32 ? col.element(idx) + : col.type().id() == type_id::DECIMAL64 ? col.element(idx) + : col.element<__int128_t>(idx); int64_t const sign = (element < 0) ? 1 : 0; uint64_t zigzaged_value = ((element ^ -sign) * 2) + sign; diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu index ec8041c933e..16dbcda958d 100644 --- a/cpp/src/io/parquet/reader_impl.cu +++ b/cpp/src/io/parquet/reader_impl.cu @@ -224,8 +224,7 @@ std::tuple conversion_info(type_id column_type_id, int8_t converted_type = converted; if (converted_type == parquet::DECIMAL && column_type_id != type_id::FLOAT64 && - column_type_id != type_id::DECIMAL32 && column_type_id != type_id::DECIMAL64 && - column_type_id != type_id::DECIMAL128) { + not cudf::is_fixed_point(column_type_id)) { converted_type = parquet::UNKNOWN; // Not converting to float64 or decimal } return std::make_tuple(type_width, clock_rate, converted_type); @@ -594,8 +593,7 @@ class aggregate_metadata { nesting.push_back(static_cast(output_columns.size())); auto const col_type = to_type_id(schema, strings_to_categorical, timestamp_type_id, strict_decimal_types); - auto const dtype = col_type == type_id::DECIMAL32 || col_type == type_id::DECIMAL64 || - col_type == type_id::DECIMAL128 + auto const dtype = cudf::is_fixed_point(col_type) ? data_type{col_type, numeric::scale_type{-schema.decimal_scale}} : data_type{col_type}; output_columns.emplace_back(dtype, schema.repetition_type == OPTIONAL ? true : false); diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 7c0ce03886d..b0ac3ccf4c7 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -339,6 +339,10 @@ struct leaf_schema_fn { } else if (std::is_same_v) { col_schema.type = Type::INT64; col_schema.stats_dtype = statistics_dtype::dtype_decimal64; + } else if (std::is_same_v) { + // TODO + // col_schema.type = Type::INT64; + // col_schema.stats_dtype = statistics_dtype::dtype_decimal64; } else { CUDF_FAIL("Unsupported fixed point type for parquet writer"); } diff --git a/cpp/src/reductions/scan/scan.cuh b/cpp/src/reductions/scan/scan.cuh index 3853e34e97b..cba27c0cd54 100644 --- a/cpp/src/reductions/scan/scan.cuh +++ b/cpp/src/reductions/scan/scan.cuh @@ -67,7 +67,7 @@ std::unique_ptr scan_agg_dispatch(const column_view& input, case aggregation::PRODUCT: // a product scan on a decimal type with non-zero scale would result in each element having // a different scale, and because scale is stored once per column, this is not possible - if (is_fixed_point(input.type())) CUDF_FAIL("decimal32/64 cannot support product scan"); + if (is_fixed_point(input.type())) CUDF_FAIL("decimal32/64/128 cannot support product scan"); return type_dispatcher( input.type(), DispatchFn(), input, null_handling, stream, mr); case aggregation::RANK: return inclusive_rank_scan(input, stream, mr); diff --git a/cpp/src/reductions/simple.cuh b/cpp/src/reductions/simple.cuh index 5aa42dbda74..6cbfb220a9d 100644 --- a/cpp/src/reductions/simple.cuh +++ b/cpp/src/reductions/simple.cuh @@ -74,7 +74,7 @@ std::unique_ptr simple_reduction(column_view const& col, /** * @brief Reduction for `sum`, `product`, `min` and `max` for decimal types * - * @tparam DecimalXX The `decimal32` or `decimal64` type + * @tparam DecimalXX The `decimal32`, `decimal64` or `decimal128` type * @tparam Op The operator of cudf::reduction::op:: * @param col Input column of data to reduce diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu index 620504f5c93..a129fc56846 100644 --- a/cpp/src/transform/row_bit_count.cu +++ b/cpp/src/transform/row_bit_count.cu @@ -205,7 +205,7 @@ struct flatten_functor { thrust::optional parent_index) { // track branch depth as we reach this list and after we pass it - size_type const branch_depth_start = cur_branch_depth; + auto const branch_depth_start = cur_branch_depth; auto const is_list_inside_struct = parent_index && out[parent_index.value()].type().id() == type_id::STRUCT; if (is_list_inside_struct) { diff --git a/cpp/src/unary/cast_ops.cu b/cpp/src/unary/cast_ops.cu index d01d0a8cbbc..f201667cfd0 100644 --- a/cpp/src/unary/cast_ops.cu +++ b/cpp/src/unary/cast_ops.cu @@ -160,7 +160,7 @@ struct device_cast { * @brief Takes a `fixed_point` column_view as @p input and returns a `fixed_point` column with new * @p scale * - * @tparam T Type of the `fixed_point` column_view (`decimal32` or `decimal64`) + * @tparam T Type of the `fixed_point` column_view (`decimal32`, `decimal64` or `decimal128`) * @param input Input `column_view` * @param scale `scale` of the returned `column` * @param mr Device memory resource used to allocate the returned column's device memory @@ -338,9 +338,9 @@ struct dispatch_unary_cast_to { { if (!cudf::is_fixed_width()) - CUDF_FAIL("Column type must be numeric or chrono or decimal32/64"); + CUDF_FAIL("Column type must be numeric or chrono or decimal32/64/128"); else if (cudf::is_fixed_point()) - CUDF_FAIL("Currently only decimal32/64 to floating point/integral is supported"); + CUDF_FAIL("Currently only decimal32/64/128 to floating point/integral is supported"); else if (cudf::is_timestamp() && is_numeric()) CUDF_FAIL("Timestamps can be created only from duration"); else @@ -364,7 +364,7 @@ struct dispatch_unary_cast_from { template std::enable_if_t(), std::unique_ptr> operator()(Args&&...) { - CUDF_FAIL("Column type must be numeric or chrono or decimal32/64"); + CUDF_FAIL("Column type must be numeric or chrono or decimal32/64/128"); } }; } // anonymous namespace From 321761c6e5e6ded79edffc93a28754d544ff8e84 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Thu, 29 Jul 2021 15:54:17 +0000 Subject: [PATCH 030/112] Fix for TRANSFORM_TEST --- cpp/tests/transform/row_bit_count_test.cu | 58 ++++++++++------------- 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/cpp/tests/transform/row_bit_count_test.cu b/cpp/tests/transform/row_bit_count_test.cu index 0081cf0d467..ccae898cd2e 100644 --- a/cpp/tests/transform/row_bit_count_test.cu +++ b/cpp/tests/transform/row_bit_count_test.cu @@ -81,16 +81,13 @@ std::pair, std::unique_ptr> build_list_column() { using LCW = cudf::test::lists_column_wrapper; constexpr size_type type_size = sizeof(device_storage_type_t) * CHAR_BIT; - - // clang-format off - cudf::test::lists_column_wrapper col{ {{1, 2}, {3, 4, 5}}, - LCW{LCW{}}, - {LCW{10}}, - {{6, 7, 8}, {9}}, - {{-1, -2}, {-3, -4}}, - {{-5, -6, -7}, {-8, -9}} }; - // clang-format on - + cudf::test::fixed_width_column_wrapper values{ + 1, 2, 3, 4, 5, 10, 6, 7, 8, 9, -1, -2, -3, -4, -5, -6, -7, -8, -9}; + cudf::test::fixed_width_column_wrapper inner_offsets{ + 0, 2, 5, 6, 9, 10, 12, 14, 17, 19}; + auto inner_list = cudf::make_lists_column(9, inner_offsets.release(), values.release(), 0, {}); + cudf::test::fixed_width_column_wrapper outer_offsets{0, 2, 2, 3, 5, 7, 9}; + auto col = cudf::make_lists_column(6, outer_offsets.release(), std::move(inner_list), 0, {}); // expected size = (num rows at level 1 + num_rows at level 2) + # values in the leaf cudf::test::fixed_width_column_wrapper expected{ ((4 + 8) * CHAR_BIT) + (type_size * 5), @@ -99,8 +96,7 @@ std::pair, std::unique_ptr> build_list_column() ((4 + 8) * CHAR_BIT) + (type_size * 4), ((4 + 8) * CHAR_BIT) + (type_size * 4), ((4 + 8) * CHAR_BIT) + (type_size * 5)}; - - return {col.release(), expected.release()}; + return {std::move(col), expected.release()}; } TYPED_TEST(RowBitCountTyped, Lists) @@ -119,22 +115,21 @@ TYPED_TEST(RowBitCountTyped, Lists) TYPED_TEST(RowBitCountTyped, ListsWithNulls) { - using T = TypeParam; - using LCW = cudf::test::lists_column_wrapper; - constexpr size_type type_size = sizeof(device_storage_type_t) * CHAR_BIT; - - std::vector valids{true, false, true}; - std::vector valids2{false, true, false}; - std::vector valids3{true, false}; + using T = TypeParam; + using LCW = cudf::test::lists_column_wrapper; - // clang-format off - cudf::test::lists_column_wrapper col{ {{1, 2}, {{3, 4, 5}, valids.begin()}}, - LCW{LCW{}}, - {LCW{10}}, - {{{{6, 7, 8}, valids2.begin()}, {9}}, valids3.begin()} }; - // clang-format on - - table_view t({col}); + constexpr size_type type_size = sizeof(device_storage_type_t) * CHAR_BIT; + cudf::test::fixed_width_column_wrapper values{{1, 2, 3, 4, 5, 10, 6, 7, 8}, + {1, 1, 1, 0, 1, 1, 0, 1, 0}}; + cudf::test::fixed_width_column_wrapper inner_offsets{0, 2, 5, 6, 9, 9}; + std::vector inner_validity{1, 1, 1, 1, 0}; + auto inner_null_mask = + cudf::test::detail::make_null_mask(inner_validity.begin(), inner_validity.end()); + auto inner_list = cudf::make_lists_column( + 5, inner_offsets.release(), values.release(), 1, std::move(inner_null_mask)); + cudf::test::fixed_width_column_wrapper outer_offsets{0, 2, 2, 3, 5}; + auto col = cudf::make_lists_column(4, outer_offsets.release(), std::move(inner_list), 0, {}); + table_view t({*col}); auto result = cudf::row_bit_count(t); // expected size = (num rows at level 1 + num_rows at level 2) + # values in the leaf + validity @@ -144,7 +139,6 @@ TYPED_TEST(RowBitCountTyped, ListsWithNulls) ((4 + 0) * CHAR_BIT) + (type_size * 0), ((4 + 4) * CHAR_BIT) + (type_size * 1) + 2, ((4 + 8) * CHAR_BIT) + (type_size * 3) + 5}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); } @@ -430,10 +424,10 @@ TEST_F(RowBitCount, NestedTypes) l4_offsets.end()); auto const l4_size = l4_offsets.size() - 1; auto l4 = cudf::make_lists_column(static_cast(l4_size), - l4_offsets_col.release(), - innermost_struct.release(), - cudf::UNKNOWN_NULL_COUNT, - rmm::device_buffer{}); + l4_offsets_col.release(), + innermost_struct.release(), + cudf::UNKNOWN_NULL_COUNT, + rmm::device_buffer{}); // inner struct std::vector> inner_struct_children; From 02b00444b878fe1f7c3a80dd85323d105d6e6d90 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Thu, 29 Jul 2021 20:57:27 +0000 Subject: [PATCH 031/112] Rename FixedPointTestBothReps --- cpp/tests/binaryop/binop-integration-test.cpp | 76 +++++++++---------- cpp/tests/copying/concatenate_tests.cu | 20 ++--- cpp/tests/copying/scatter_tests.cpp | 6 +- cpp/tests/fixed_point/fixed_point_tests.cpp | 38 +++++----- cpp/tests/fixed_point/fixed_point_tests.cu | 6 +- cpp/tests/groupby/count_scan_tests.cpp | 6 +- cpp/tests/groupby/count_tests.cpp | 6 +- cpp/tests/groupby/max_scan_tests.cpp | 6 +- cpp/tests/groupby/max_tests.cpp | 8 +- cpp/tests/groupby/min_scan_tests.cpp | 6 +- cpp/tests/groupby/min_tests.cpp | 8 +- cpp/tests/groupby/sum_scan_tests.cpp | 6 +- cpp/tests/groupby/sum_tests.cpp | 8 +- cpp/tests/merge/merge_test.cpp | 10 +-- cpp/tests/reductions/reduction_tests.cpp | 38 +++++----- cpp/tests/replace/replace_tests.cpp | 6 +- .../reshape/interleave_columns_tests.cpp | 6 +- cpp/tests/search/search_test.cpp | 8 +- cpp/tests/sort/sort_test.cpp | 6 +- 19 files changed, 137 insertions(+), 137 deletions(-) diff --git a/cpp/tests/binaryop/binop-integration-test.cpp b/cpp/tests/binaryop/binop-integration-test.cpp index 68a8845132b..f31e3ebd50e 100644 --- a/cpp/tests/binaryop/binop-integration-test.cpp +++ b/cpp/tests/binaryop/binop-integration-test.cpp @@ -2018,14 +2018,14 @@ TEST_F(BinaryOperationIntegrationTest, ATan2_Vector_Vector_FP64_SI32_SI64) } template -struct FixedPointTestBothReps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; template using wrapper = cudf::test::fixed_width_column_wrapper; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpAdd) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd) { using namespace numeric; using decimalXX = TypeParam; @@ -2034,7 +2034,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpAdd) auto begin = cudf::detail::make_counting_transform_iterator(1, [](auto i) { return decimalXX{i, scale_type{0}}; - }); + }); auto const vec1 = std::vector(begin, begin + sz); auto const vec2 = std::vector(sz, decimalXX{2, scale_type{0}}); auto expected = std::vector(sz); @@ -2058,7 +2058,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpAdd) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpMultiply) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpMultiply) { using namespace numeric; using decimalXX = TypeParam; @@ -2067,7 +2067,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpMultiply) auto begin = cudf::detail::make_counting_transform_iterator(1, [](auto i) { return decimalXX{i, scale_type{0}}; - }); + }); auto const vec1 = std::vector(begin, begin + sz); auto const vec2 = std::vector(sz, decimalXX{2, scale_type{0}}); auto expected = std::vector(sz); @@ -2094,7 +2094,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpMultiply) template using fp_wrapper = cudf::test::fixed_point_column_wrapper; -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpMultiply2) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpMultiply2) { using namespace numeric; using decimalXX = TypeParam; @@ -2113,7 +2113,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpMultiply2) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpDiv) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpDiv) { using namespace numeric; using decimalXX = TypeParam; @@ -2132,7 +2132,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpDiv) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpDiv2) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpDiv2) { using namespace numeric; using decimalXX = TypeParam; @@ -2151,7 +2151,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpDiv2) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpDiv3) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpDiv3) { using namespace numeric; using decimalXX = TypeParam; @@ -2168,7 +2168,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpDiv3) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpDiv4) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpDiv4) { using namespace numeric; using decimalXX = TypeParam; @@ -2188,7 +2188,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpDiv4) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpAdd2) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd2) { using namespace numeric; using decimalXX = TypeParam; @@ -2207,7 +2207,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpAdd2) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpAdd3) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd3) { using namespace numeric; using decimalXX = TypeParam; @@ -2226,7 +2226,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpAdd3) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpAdd4) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd4) { using namespace numeric; using decimalXX = TypeParam; @@ -2243,7 +2243,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpAdd4) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpAdd5) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd5) { using namespace numeric; using decimalXX = TypeParam; @@ -2260,7 +2260,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpAdd5) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpAdd6) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd6) { using namespace numeric; using decimalXX = TypeParam; @@ -2279,7 +2279,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpAdd6) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, result2->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointCast) +TYPED_TEST(FixedPointTestAllReps, FixedPointCast) { using namespace numeric; using decimalXX = TypeParam; @@ -2293,7 +2293,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointCast) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpMultiplyScalar) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpMultiplyScalar) { using namespace numeric; using decimalXX = TypeParam; @@ -2310,7 +2310,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpMultiplyScalar) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpSimplePlus) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpSimplePlus) { using namespace numeric; using decimalXX = TypeParam; @@ -2329,7 +2329,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpSimplePlus) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpEqualSimple) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualSimple) { using namespace numeric; using decimalXX = TypeParam; @@ -2346,7 +2346,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpEqualSimple) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpEqualSimpleScale0) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualSimpleScale0) { using namespace numeric; using decimalXX = TypeParam; @@ -2362,7 +2362,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpEqualSimpleScale0) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpEqualSimpleScale0Null) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualSimpleScale0Null) { using namespace numeric; using decimalXX = TypeParam; @@ -2378,7 +2378,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpEqualSimpleScale0Null) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpEqualSimpleScale2Null) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualSimpleScale2Null) { using namespace numeric; using decimalXX = TypeParam; @@ -2394,7 +2394,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpEqualSimpleScale2Null) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpEqualLessGreater) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualLessGreater) { using namespace numeric; using decimalXX = TypeParam; @@ -2438,7 +2438,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpEqualLessGreater) CUDF_TEST_EXPECT_COLUMNS_EQUAL(true_col, greater_result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpNullMaxSimple) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpNullMaxSimple) { using namespace numeric; using decimalXX = TypeParam; @@ -2458,7 +2458,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpNullMaxSimple) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpNullMinSimple) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpNullMinSimple) { using namespace numeric; using decimalXX = TypeParam; @@ -2478,7 +2478,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpNullMinSimple) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpNullEqualsSimple) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpNullEqualsSimple) { using namespace numeric; using decimalXX = TypeParam; @@ -2495,7 +2495,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpNullEqualsSimple) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div) { using namespace numeric; using decimalXX = TypeParam; @@ -2511,7 +2511,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div2) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div2) { using namespace numeric; using decimalXX = TypeParam; @@ -2527,7 +2527,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div2) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div3) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div3) { using namespace numeric; using decimalXX = TypeParam; @@ -2543,7 +2543,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div3) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div4) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div4) { using namespace numeric; using decimalXX = TypeParam; @@ -2559,7 +2559,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div4) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div6) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div6) { using namespace numeric; using decimalXX = TypeParam; @@ -2576,7 +2576,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div6) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div7) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div7) { using namespace numeric; using decimalXX = TypeParam; @@ -2593,7 +2593,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div7) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div8) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div8) { using namespace numeric; using decimalXX = TypeParam; @@ -2609,7 +2609,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div8) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div9) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div9) { using namespace numeric; using decimalXX = TypeParam; @@ -2625,7 +2625,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div9) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div10) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div10) { using namespace numeric; using decimalXX = TypeParam; @@ -2641,7 +2641,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div10) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div11) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div11) { using namespace numeric; using decimalXX = TypeParam; @@ -2657,7 +2657,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOp_Div11) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTestBothReps, FixedPointBinaryOpThrows) +TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpThrows) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/copying/concatenate_tests.cu b/cpp/tests/copying/concatenate_tests.cu index 7d3b7beb2cb..74f0688a38d 100644 --- a/cpp/tests/copying/concatenate_tests.cu +++ b/cpp/tests/copying/concatenate_tests.cu @@ -363,7 +363,7 @@ TEST_F(OverflowTest, OverflowTest) auto offsets = cudf::test::fixed_width_column_wrapper{0, size}; auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, size); auto col = cudf::make_strings_column( - 1, offsets.release(), std::move(many_chars), 0, rmm::device_buffer{}); + 1, offsets.release(), std::move(many_chars), 0, rmm::device_buffer{}); table_view tbl({*col}); EXPECT_THROW(cudf::concatenate(std::vector({tbl, tbl, tbl, tbl, tbl, tbl})), @@ -378,7 +378,7 @@ TEST_F(OverflowTest, OverflowTest) auto many_offsets = cudf::make_fixed_width_column(data_type{type_id::INT32}, size + 1); auto chars = cudf::test::fixed_width_column_wrapper{0, 1, 2}; auto col = cudf::make_strings_column( - size, std::move(many_offsets), chars.release(), 0, rmm::device_buffer{}); + size, std::move(many_offsets), chars.release(), 0, rmm::device_buffer{}); table_view tbl({*col}); EXPECT_THROW(cudf::concatenate(std::vector({tbl, tbl, tbl, tbl, tbl, tbl})), @@ -486,7 +486,7 @@ TEST_F(OverflowTest, Presliced) cudf::test::fixed_width_column_wrapper offsets(offset_gen, offset_gen + num_rows + 1); auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, num_rows); auto col = cudf::make_strings_column( - num_rows, offsets.release(), std::move(many_chars), 0, rmm::device_buffer{}); + num_rows, offsets.release(), std::move(many_chars), 0, rmm::device_buffer{}); auto sliced = cudf::split(*col, {(num_rows / 2) - 1}); @@ -517,7 +517,7 @@ TEST_F(OverflowTest, Presliced) offsets->mutable_view().begin()); auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, num_rows); auto col = cudf::make_strings_column( - num_rows, std::move(offsets), std::move(many_chars), 0, rmm::device_buffer{}); + num_rows, std::move(offsets), std::move(many_chars), 0, rmm::device_buffer{}); // should pass (with 2 rows to spare) // leaving this disabled as it typically runs out of memory on a T4 @@ -686,7 +686,7 @@ TEST_F(OverflowTest, BigColumnsSmallSlices) offsets->mutable_view().begin()); auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, inner_size); auto col = cudf::make_strings_column( - num_rows, std::move(offsets), std::move(many_chars), 0, rmm::device_buffer{}); + num_rows, std::move(offsets), std::move(many_chars), 0, rmm::device_buffer{}); auto sliced = cudf::slice(*col, {16, 32}); @@ -714,7 +714,7 @@ TEST_F(OverflowTest, BigColumnsSmallSlices) offsets->mutable_view().begin()); auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, inner_size); auto col = cudf::make_lists_column( - num_rows, std::move(offsets), std::move(many_chars), 0, rmm::device_buffer{}); + num_rows, std::move(offsets), std::move(many_chars), 0, rmm::device_buffer{}); auto sliced = cudf::slice(*col, {16, 32}); @@ -742,7 +742,7 @@ TEST_F(OverflowTest, BigColumnsSmallSlices) offsets->mutable_view().begin()); auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, inner_size); auto list_col = cudf::make_lists_column( - num_rows, std::move(offsets), std::move(many_chars), 0, rmm::device_buffer{}); + num_rows, std::move(offsets), std::move(many_chars), 0, rmm::device_buffer{}); // struct std::vector> children; @@ -1535,15 +1535,15 @@ TEST_F(ListsColumnTest, ListOfStructs) } template -struct FixedPointTestBothReps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; struct FixedPointTest : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTestBothReps, FixedPointConcatentate) +TYPED_TEST(FixedPointTestAllReps, FixedPointConcatentate) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/copying/scatter_tests.cpp b/cpp/tests/copying/scatter_tests.cpp index be4a689f213..b0b942b57b8 100644 --- a/cpp/tests/copying/scatter_tests.cpp +++ b/cpp/tests/copying/scatter_tests.cpp @@ -899,14 +899,14 @@ TEST_F(BooleanMaskScatterScalarFails, NumberOfColumnAndScalarMismatch) } template -struct FixedPointTestBothReps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; template using wrapper = cudf::test::fixed_width_column_wrapper; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTestBothReps, FixedPointScatter) +TYPED_TEST(FixedPointTestAllReps, FixedPointScatter) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/fixed_point/fixed_point_tests.cpp b/cpp/tests/fixed_point/fixed_point_tests.cpp index 47b2a95e7b5..a90e0f0f541 100644 --- a/cpp/tests/fixed_point/fixed_point_tests.cpp +++ b/cpp/tests/fixed_point/fixed_point_tests.cpp @@ -35,14 +35,14 @@ struct FixedPointTest : public cudf::test::BaseFixture { }; template -struct FixedPointTestBothReps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; using RepresentationTypes = ::testing::Types; -TYPED_TEST_CASE(FixedPointTestBothReps, RepresentationTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, RepresentationTypes); -TYPED_TEST(FixedPointTestBothReps, SimpleDecimalXXConstruction) +TYPED_TEST(FixedPointTestAllReps, SimpleDecimalXXConstruction) { using decimalXX = fixed_point; @@ -63,7 +63,7 @@ TYPED_TEST(FixedPointTestBothReps, SimpleDecimalXXConstruction) EXPECT_EQ(1.234567, static_cast(num6)); } -TYPED_TEST(FixedPointTestBothReps, SimpleNegativeDecimalXXConstruction) +TYPED_TEST(FixedPointTestAllReps, SimpleNegativeDecimalXXConstruction) { using decimalXX = fixed_point; @@ -84,7 +84,7 @@ TYPED_TEST(FixedPointTestBothReps, SimpleNegativeDecimalXXConstruction) EXPECT_EQ(-1.234567, static_cast(num6)); } -TYPED_TEST(FixedPointTestBothReps, PaddedDecimalXXConstruction) +TYPED_TEST(FixedPointTestAllReps, PaddedDecimalXXConstruction) { using decimalXX = fixed_point; @@ -109,7 +109,7 @@ TYPED_TEST(FixedPointTestBothReps, PaddedDecimalXXConstruction) EXPECT_EQ(0.000123, static_cast(y)); } -TYPED_TEST(FixedPointTestBothReps, SimpleBinaryFPConstruction) +TYPED_TEST(FixedPointTestAllReps, SimpleBinaryFPConstruction) { using binary_fp = fixed_point; @@ -138,7 +138,7 @@ TYPED_TEST(FixedPointTestBothReps, SimpleBinaryFPConstruction) EXPECT_EQ(1.4375, static_cast(num9)); } -TYPED_TEST(FixedPointTestBothReps, MoreSimpleBinaryFPConstruction) +TYPED_TEST(FixedPointTestAllReps, MoreSimpleBinaryFPConstruction) { using binary_fp = fixed_point; @@ -149,7 +149,7 @@ TYPED_TEST(FixedPointTestBothReps, MoreSimpleBinaryFPConstruction) EXPECT_EQ(2.0625, static_cast(num1)); } -TYPED_TEST(FixedPointTestBothReps, SimpleDecimalXXMath) +TYPED_TEST(FixedPointTestAllReps, SimpleDecimalXXMath) { using decimalXX = fixed_point; @@ -174,7 +174,7 @@ TYPED_TEST(FixedPointTestBothReps, SimpleDecimalXXMath) EXPECT_EQ(a - b, a); } -TYPED_TEST(FixedPointTestBothReps, ComparisonOperators) +TYPED_TEST(FixedPointTestAllReps, ComparisonOperators) { using decimalXX = fixed_point; @@ -193,7 +193,7 @@ TYPED_TEST(FixedPointTestBothReps, ComparisonOperators) EXPECT_TRUE(SIX / TWO >= ONE); } -TYPED_TEST(FixedPointTestBothReps, DecimalXXTrickyDivision) +TYPED_TEST(FixedPointTestAllReps, DecimalXXTrickyDivision) { using decimalXX = fixed_point; @@ -223,7 +223,7 @@ TYPED_TEST(FixedPointTestBothReps, DecimalXXTrickyDivision) EXPECT_EQ(static_cast(n), 20); } -TYPED_TEST(FixedPointTestBothReps, DecimalXXRounding) +TYPED_TEST(FixedPointTestAllReps, DecimalXXRounding) { using decimalXX = fixed_point; @@ -251,7 +251,7 @@ TYPED_TEST(FixedPointTestBothReps, DecimalXXRounding) EXPECT_TRUE(FIVE_0 * THREE_0 != TEN_1); } -TYPED_TEST(FixedPointTestBothReps, ArithmeticWithDifferentScales) +TYPED_TEST(FixedPointTestAllReps, ArithmeticWithDifferentScales) { using decimalXX = fixed_point; @@ -276,7 +276,7 @@ TYPED_TEST(FixedPointTestBothReps, ArithmeticWithDifferentScales) EXPECT_EQ(c - d, zz); } -TYPED_TEST(FixedPointTestBothReps, RescaledTest) +TYPED_TEST(FixedPointTestAllReps, RescaledTest) { using decimalXX = fixed_point; @@ -296,7 +296,7 @@ TYPED_TEST(FixedPointTestBothReps, RescaledTest) EXPECT_EQ(num5, num6.rescaled(scale_type{-5})); } -TYPED_TEST(FixedPointTestBothReps, RescaledRounding) +TYPED_TEST(FixedPointTestAllReps, RescaledRounding) { using decimalXX = fixed_point; @@ -311,7 +311,7 @@ TYPED_TEST(FixedPointTestBothReps, RescaledRounding) EXPECT_EQ(-1000, static_cast(num3.rescaled(scale_type{3}))); } -TYPED_TEST(FixedPointTestBothReps, BoolConversion) +TYPED_TEST(FixedPointTestAllReps, BoolConversion) { using decimalXX = fixed_point; @@ -468,7 +468,7 @@ struct cast_to_int32_fn { int32_t __host__ __device__ operator()(decimal32 fp) { return static_cast(fp); } }; -TYPED_TEST(FixedPointTestBothReps, FixedPointColumnWrapper) +TYPED_TEST(FixedPointTestAllReps, FixedPointColumnWrapper) { using namespace numeric; using decimalXX = fixed_point; @@ -489,7 +489,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointColumnWrapper) CUDF_TEST_EXPECT_COLUMNS_EQUAL(col, w); } -TYPED_TEST(FixedPointTestBothReps, NoScaleOrWrongTypeID) +TYPED_TEST(FixedPointTestAllReps, NoScaleOrWrongTypeID) { auto null_mask = cudf::create_null_mask(0, cudf::mask_state::ALL_NULL); @@ -498,7 +498,7 @@ TYPED_TEST(FixedPointTestBothReps, NoScaleOrWrongTypeID) cudf::logic_error); } -TYPED_TEST(FixedPointTestBothReps, SimpleFixedPointColumnWrapper) +TYPED_TEST(FixedPointTestAllReps, SimpleFixedPointColumnWrapper) { using RepType = cudf::device_storage_type_t; @@ -554,7 +554,7 @@ TEST_F(FixedPointTest, PositiveScaleWithValuesOutsideUnderlyingType64) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, result2->view()); } -TYPED_TEST(FixedPointTestBothReps, ExtremelyLargeNegativeScale) +TYPED_TEST(FixedPointTestAllReps, ExtremelyLargeNegativeScale) { // This is testing fixed_point values with an extremely large negative scale. The fixed_point // implementation should be able to handle any scale representable by an int32_t diff --git a/cpp/tests/fixed_point/fixed_point_tests.cu b/cpp/tests/fixed_point/fixed_point_tests.cu index 2627ab6d48d..78101a3b1fe 100644 --- a/cpp/tests/fixed_point/fixed_point_tests.cu +++ b/cpp/tests/fixed_point/fixed_point_tests.cu @@ -40,14 +40,14 @@ struct FixedPointTest : public cudf::test::BaseFixture { }; template -struct FixedPointTestBothReps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; using RepresentationTypes = ::testing::Types; -TYPED_TEST_CASE(FixedPointTestBothReps, RepresentationTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, RepresentationTypes); -TYPED_TEST(FixedPointTestBothReps, DecimalXXThrust) +TYPED_TEST(FixedPointTestAllReps, DecimalXXThrust) { using decimalXX = fixed_point; diff --git a/cpp/tests/groupby/count_scan_tests.cpp b/cpp/tests/groupby/count_scan_tests.cpp index 9740bfa1954..8ca73e03bbc 100644 --- a/cpp/tests/groupby/count_scan_tests.cpp +++ b/cpp/tests/groupby/count_scan_tests.cpp @@ -156,12 +156,12 @@ TEST_F(groupby_count_scan_string_test, basic) } template -struct FixedPointTestBothReps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTestBothReps, GroupByCountScan) +TYPED_TEST(FixedPointTestAllReps, GroupByCountScan) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/groupby/count_tests.cpp b/cpp/tests/groupby/count_tests.cpp index 2d45de04607..2d695957326 100644 --- a/cpp/tests/groupby/count_tests.cpp +++ b/cpp/tests/groupby/count_tests.cpp @@ -169,12 +169,12 @@ TEST_F(groupby_count_string_test, basic) // clang-format on template -struct FixedPointTestBothReps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTestBothReps, GroupByCount) +TYPED_TEST(FixedPointTestAllReps, GroupByCount) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/groupby/max_scan_tests.cpp b/cpp/tests/groupby/max_scan_tests.cpp index 70a48da69e8..962fdcde51a 100644 --- a/cpp/tests/groupby/max_scan_tests.cpp +++ b/cpp/tests/groupby/max_scan_tests.cpp @@ -129,12 +129,12 @@ TYPED_TEST(groupby_max_scan_test, null_keys_and_values) } template -struct FixedPointTestBothReps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTestBothReps, GroupBySortMaxScanDecimalAsValue) +TYPED_TEST(FixedPointTestAllReps, GroupBySortMaxScanDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/groupby/max_tests.cpp b/cpp/tests/groupby/max_tests.cpp index b5710d3f4bc..eb000cb73df 100644 --- a/cpp/tests/groupby/max_tests.cpp +++ b/cpp/tests/groupby/max_tests.cpp @@ -224,12 +224,12 @@ TEST_F(groupby_dictionary_max_test, basic) } template -struct FixedPointTestBothReps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTestBothReps, GroupBySortMaxDecimalAsValue) +TYPED_TEST(FixedPointTestAllReps, GroupBySortMaxDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; @@ -253,7 +253,7 @@ TYPED_TEST(FixedPointTestBothReps, GroupBySortMaxDecimalAsValue) } } -TYPED_TEST(FixedPointTestBothReps, GroupByHashMaxDecimalAsValue) +TYPED_TEST(FixedPointTestAllReps, GroupByHashMaxDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/groupby/min_scan_tests.cpp b/cpp/tests/groupby/min_scan_tests.cpp index ef548407761..e4f2091781e 100644 --- a/cpp/tests/groupby/min_scan_tests.cpp +++ b/cpp/tests/groupby/min_scan_tests.cpp @@ -143,12 +143,12 @@ TEST_F(groupby_min_scan_string_test, basic) } template -struct FixedPointTestBothReps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTestBothReps, GroupBySortMinScanDecimalAsValue) +TYPED_TEST(FixedPointTestAllReps, GroupBySortMinScanDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/groupby/min_tests.cpp b/cpp/tests/groupby/min_tests.cpp index 1544e867595..161c69714ae 100644 --- a/cpp/tests/groupby/min_tests.cpp +++ b/cpp/tests/groupby/min_tests.cpp @@ -224,12 +224,12 @@ TEST_F(groupby_dictionary_min_test, basic) } template -struct FixedPointTestBothReps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTestBothReps, GroupBySortMinDecimalAsValue) +TYPED_TEST(FixedPointTestAllReps, GroupBySortMinDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; @@ -252,7 +252,7 @@ TYPED_TEST(FixedPointTestBothReps, GroupBySortMinDecimalAsValue) } } -TYPED_TEST(FixedPointTestBothReps, GroupByHashMinDecimalAsValue) +TYPED_TEST(FixedPointTestAllReps, GroupByHashMinDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/groupby/sum_scan_tests.cpp b/cpp/tests/groupby/sum_scan_tests.cpp index 2f1928747ae..af8e8ff2eb4 100644 --- a/cpp/tests/groupby/sum_scan_tests.cpp +++ b/cpp/tests/groupby/sum_scan_tests.cpp @@ -133,12 +133,12 @@ TYPED_TEST(groupby_sum_scan_test, null_keys_and_values) } template -struct FixedPointTestBothReps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTestBothReps, GroupBySortSumScanDecimalAsValue) +TYPED_TEST(FixedPointTestAllReps, GroupBySortSumScanDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/groupby/sum_tests.cpp b/cpp/tests/groupby/sum_tests.cpp index 458937ff2e4..9f4aaa1336f 100644 --- a/cpp/tests/groupby/sum_tests.cpp +++ b/cpp/tests/groupby/sum_tests.cpp @@ -152,12 +152,12 @@ TYPED_TEST(groupby_sum_test, dictionary) } template -struct FixedPointTestBothReps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTestBothReps, GroupBySortSumDecimalAsValue) +TYPED_TEST(FixedPointTestAllReps, GroupBySortSumDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; @@ -187,7 +187,7 @@ TYPED_TEST(FixedPointTestBothReps, GroupBySortSumDecimalAsValue) } } -TYPED_TEST(FixedPointTestBothReps, GroupByHashSumDecimalAsValue) +TYPED_TEST(FixedPointTestAllReps, GroupByHashSumDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/merge/merge_test.cpp b/cpp/tests/merge/merge_test.cpp index 452f3adfdbb..de6eefb989a 100644 --- a/cpp/tests/merge/merge_test.cpp +++ b/cpp/tests/merge/merge_test.cpp @@ -453,7 +453,7 @@ TYPED_TEST(MergeTest_, Merge1KeyNullColumns) } else { return row * 2; } - }); + }); auto valid_sequence1 = cudf::detail::make_counting_transform_iterator( 0, [inputRows](auto row) { return (row < inputRows - 1); }); cudf::test::fixed_width_column_wrapper @@ -698,7 +698,7 @@ TEST_F(MergeTest, KeysWithNulls) cudf::size_type nrows = 13200; // Ensures that thrust::merge uses more than one tile/block auto data_iter = thrust::make_counting_iterator(0); auto valids1 = cudf::detail::make_counting_transform_iterator( - 0, [](auto row) { return (row % 10 == 0) ? false : true; }); + 0, [](auto row) { return (row % 10 == 0) ? false : true; }); cudf::test::fixed_width_column_wrapper data1(data_iter, data_iter + nrows, valids1); auto valids2 = cudf::detail::make_counting_transform_iterator( 0, [](auto row) { return (row % 15 == 0) ? false : true; }); @@ -874,15 +874,15 @@ TEST_F(MergeTest, StructsNestedWithNulls) } template -struct FixedPointTestBothReps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; template using fp_wrapper = cudf::test::fixed_point_column_wrapper; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTestBothReps, FixedPointMerge) +TYPED_TEST(FixedPointTestAllReps, FixedPointMerge) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index 37dbb913781..9a919c63d28 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -1041,12 +1041,12 @@ TYPED_TEST(ReductionTest, UniqueCount) } template -struct FixedPointTestBothReps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTestBothReps, FixedPointReductionProductZeroScale) +TYPED_TEST(FixedPointTestAllReps, FixedPointReductionProductZeroScale) { using namespace numeric; using decimalXX = TypeParam; @@ -1070,7 +1070,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionProductZeroScale) EXPECT_EQ(result_fp, _24); } -TYPED_TEST(FixedPointTestBothReps, FixedPointReductionProduct) +TYPED_TEST(FixedPointTestAllReps, FixedPointReductionProduct) { using namespace numeric; using decimalXX = TypeParam; @@ -1090,7 +1090,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionProduct) } } -TYPED_TEST(FixedPointTestBothReps, FixedPointReductionProductWithNulls) +TYPED_TEST(FixedPointTestAllReps, FixedPointReductionProductWithNulls) { using namespace numeric; using decimalXX = TypeParam; @@ -1110,7 +1110,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionProductWithNulls) } } -TYPED_TEST(FixedPointTestBothReps, FixedPointReductionSum) +TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSum) { using namespace numeric; using decimalXX = TypeParam; @@ -1131,7 +1131,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionSum) } } -TYPED_TEST(FixedPointTestBothReps, FixedPointReductionSumAlternate) +TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSumAlternate) { using namespace numeric; using decimalXX = TypeParam; @@ -1155,7 +1155,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionSumAlternate) EXPECT_EQ(result_scalar->fixed_point_value(), TEN); } -TYPED_TEST(FixedPointTestBothReps, FixedPointReductionSumFractional) +TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSumFractional) { using namespace numeric; using decimalXX = TypeParam; @@ -1175,7 +1175,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionSumFractional) } } -TYPED_TEST(FixedPointTestBothReps, FixedPointReductionSumLarge) +TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSumLarge) { using namespace numeric; using decimalXX = TypeParam; @@ -1198,7 +1198,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionSumLarge) } } -TYPED_TEST(FixedPointTestBothReps, FixedPointReductionMin) +TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMin) { using namespace numeric; using decimalXX = TypeParam; @@ -1218,7 +1218,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionMin) } } -TYPED_TEST(FixedPointTestBothReps, FixedPointReductionMinLarge) +TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMinLarge) { using namespace numeric; using decimalXX = TypeParam; @@ -1239,7 +1239,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionMinLarge) } } -TYPED_TEST(FixedPointTestBothReps, FixedPointReductionMax) +TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMax) { using namespace numeric; using decimalXX = TypeParam; @@ -1259,7 +1259,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionMax) } } -TYPED_TEST(FixedPointTestBothReps, FixedPointReductionMaxLarge) +TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMaxLarge) { using namespace numeric; using decimalXX = TypeParam; @@ -1280,7 +1280,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionMaxLarge) } } -TYPED_TEST(FixedPointTestBothReps, FixedPointReductionNUnique) +TYPED_TEST(FixedPointTestAllReps, FixedPointReductionNUnique) { using namespace numeric; using decimalXX = TypeParam; @@ -1299,7 +1299,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionNUnique) } } -TYPED_TEST(FixedPointTestBothReps, FixedPointReductionSumOfSquares) +TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSumOfSquares) { using namespace numeric; using decimalXX = TypeParam; @@ -1319,7 +1319,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionSumOfSquares) } } -TYPED_TEST(FixedPointTestBothReps, FixedPointReductionMedianOddNumberOfElements) +TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMedianOddNumberOfElements) { using namespace numeric; using decimalXX = TypeParam; @@ -1339,7 +1339,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionMedianOddNumberOfElements) } } -TYPED_TEST(FixedPointTestBothReps, FixedPointReductionMedianEvenNumberOfElements) +TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMedianEvenNumberOfElements) { using namespace numeric; using decimalXX = TypeParam; @@ -1359,7 +1359,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionMedianEvenNumberOfElements } } -TYPED_TEST(FixedPointTestBothReps, FixedPointReductionQuantile) +TYPED_TEST(FixedPointTestAllReps, FixedPointReductionQuantile) { using namespace numeric; using decimalXX = TypeParam; @@ -1381,7 +1381,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionQuantile) } } -TYPED_TEST(FixedPointTestBothReps, FixedPointReductionNthElement) +TYPED_TEST(FixedPointTestAllReps, FixedPointReductionNthElement) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/replace/replace_tests.cpp b/cpp/tests/replace/replace_tests.cpp index 58ef08f6052..70da4aaf4d4 100644 --- a/cpp/tests/replace/replace_tests.cpp +++ b/cpp/tests/replace/replace_tests.cpp @@ -539,14 +539,14 @@ TYPED_TEST(ReplaceTest, LargeScaleReplaceTest) } template -struct FixedPointTestBothReps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; template using wrapper = cudf::test::fixed_width_column_wrapper; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTestBothReps, FixedPointReplace) +TYPED_TEST(FixedPointTestAllReps, FixedPointReplace) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/reshape/interleave_columns_tests.cpp b/cpp/tests/reshape/interleave_columns_tests.cpp index 386fd9d08ee..d1e97bb3e84 100644 --- a/cpp/tests/reshape/interleave_columns_tests.cpp +++ b/cpp/tests/reshape/interleave_columns_tests.cpp @@ -345,12 +345,12 @@ TEST_F(InterleaveStringsColumnsTest, MultiColumnStringMixNullableMix) } template -struct FixedPointTestBothReps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTestBothReps, FixedPointInterleave) +TYPED_TEST(FixedPointTestAllReps, FixedPointInterleave) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/search/search_test.cpp b/cpp/tests/search/search_test.cpp index 38fc5abb250..df340c772ed 100644 --- a/cpp/tests/search/search_test.cpp +++ b/cpp/tests/search/search_test.cpp @@ -1817,12 +1817,12 @@ TEST_F(SearchTest, multi_contains_empty_input_set_string) } template -struct FixedPointTestBothReps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTestBothReps, FixedPointLowerBound) +TYPED_TEST(FixedPointTestAllReps, FixedPointLowerBound) { using namespace numeric; using decimalXX = TypeParam; @@ -1846,7 +1846,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointLowerBound) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect); } -TYPED_TEST(FixedPointTestBothReps, FixedPointUpperBound) +TYPED_TEST(FixedPointTestAllReps, FixedPointUpperBound) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp index 6e668068f94..48cab98cb3e 100644 --- a/cpp/tests/sort/sort_test.cpp +++ b/cpp/tests/sort/sort_test.cpp @@ -673,14 +673,14 @@ TEST_F(SortByKey, ValueKeysSizeMismatch) } template -struct FixedPointTestBothReps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; template using wrapper = cudf::test::fixed_width_column_wrapper; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTestBothReps, FixedPointSortedOrderGather) +TYPED_TEST(FixedPointTestAllReps, FixedPointSortedOrderGather) { using namespace numeric; using decimalXX = TypeParam; From 95a107c717a295a3d15bbf997c077f72c5d78678 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 3 Aug 2021 20:47:16 +0000 Subject: [PATCH 032/112] test group_by for only decimal32/64 --- cpp/tests/groupby/max_tests.cpp | 9 +++++---- cpp/tests/groupby/min_tests.cpp | 9 +++++---- cpp/tests/groupby/sum_scan_tests.cpp | 7 ++++--- cpp/tests/groupby/sum_tests.cpp | 9 +++++---- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/cpp/tests/groupby/max_tests.cpp b/cpp/tests/groupby/max_tests.cpp index eb000cb73df..cfc15a8fe56 100644 --- a/cpp/tests/groupby/max_tests.cpp +++ b/cpp/tests/groupby/max_tests.cpp @@ -224,12 +224,13 @@ TEST_F(groupby_dictionary_max_test, basic) } template -struct FixedPointTestAllReps : public cudf::test::BaseFixture { +struct FixedPointTest_32_64_Reps : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); +using RepTypes = ::testing::Types; +TYPED_TEST_CASE(FixedPointTest_32_64_Reps, RepTypes); -TYPED_TEST(FixedPointTestAllReps, GroupBySortMaxDecimalAsValue) +TYPED_TEST(FixedPointTest_32_64_Reps, GroupBySortMaxDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; @@ -253,7 +254,7 @@ TYPED_TEST(FixedPointTestAllReps, GroupBySortMaxDecimalAsValue) } } -TYPED_TEST(FixedPointTestAllReps, GroupByHashMaxDecimalAsValue) +TYPED_TEST(FixedPointTest_32_64_Reps, GroupByHashMaxDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/groupby/min_tests.cpp b/cpp/tests/groupby/min_tests.cpp index 161c69714ae..e297f21afe8 100644 --- a/cpp/tests/groupby/min_tests.cpp +++ b/cpp/tests/groupby/min_tests.cpp @@ -224,12 +224,13 @@ TEST_F(groupby_dictionary_min_test, basic) } template -struct FixedPointTestAllReps : public cudf::test::BaseFixture { +struct FixedPointTest_32_64_Reps : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); +using RepTypes = ::testing::Types; +TYPED_TEST_CASE(FixedPointTest_32_64_Reps, RepTypes); -TYPED_TEST(FixedPointTestAllReps, GroupBySortMinDecimalAsValue) +TYPED_TEST(FixedPointTest_32_64_Reps, GroupBySortMinDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; @@ -252,7 +253,7 @@ TYPED_TEST(FixedPointTestAllReps, GroupBySortMinDecimalAsValue) } } -TYPED_TEST(FixedPointTestAllReps, GroupByHashMinDecimalAsValue) +TYPED_TEST(FixedPointTest_32_64_Reps, GroupByHashMinDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/groupby/sum_scan_tests.cpp b/cpp/tests/groupby/sum_scan_tests.cpp index af8e8ff2eb4..85a038af678 100644 --- a/cpp/tests/groupby/sum_scan_tests.cpp +++ b/cpp/tests/groupby/sum_scan_tests.cpp @@ -133,12 +133,13 @@ TYPED_TEST(groupby_sum_scan_test, null_keys_and_values) } template -struct FixedPointTestAllReps : public cudf::test::BaseFixture { +struct FixedPointTest_32_64_Reps : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); +using RepTypes = ::testing::Types; +TYPED_TEST_CASE(FixedPointTest_32_64_Reps, RepTypes); -TYPED_TEST(FixedPointTestAllReps, GroupBySortSumScanDecimalAsValue) +TYPED_TEST(FixedPointTest_32_64_Reps, GroupBySortSumScanDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/groupby/sum_tests.cpp b/cpp/tests/groupby/sum_tests.cpp index 9f4aaa1336f..27c63c3baef 100644 --- a/cpp/tests/groupby/sum_tests.cpp +++ b/cpp/tests/groupby/sum_tests.cpp @@ -152,12 +152,13 @@ TYPED_TEST(groupby_sum_test, dictionary) } template -struct FixedPointTestAllReps : public cudf::test::BaseFixture { +struct FixedPointTest_32_64_Reps : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); +using RepTypes = ::testing::Types; +TYPED_TEST_CASE(FixedPointTest_32_64_Reps, RepTypes); -TYPED_TEST(FixedPointTestAllReps, GroupBySortSumDecimalAsValue) +TYPED_TEST(FixedPointTest_32_64_Reps, GroupBySortSumDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; @@ -187,7 +188,7 @@ TYPED_TEST(FixedPointTestAllReps, GroupBySortSumDecimalAsValue) } } -TYPED_TEST(FixedPointTestAllReps, GroupByHashSumDecimalAsValue) +TYPED_TEST(FixedPointTest_32_64_Reps, GroupByHashSumDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; From 0d8aa3640e9401a3207b4d5ac55a2649c84c48be Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 4 Aug 2021 02:34:34 +0000 Subject: [PATCH 033/112] Using cuda::std:: for utility functions --- .../detail/utilities/device_operators.cuh | 4 +- .../cudf/detail/utilities/integer_utils.hpp | 2 +- cpp/include/cudf/fixed_point/fixed_point.hpp | 4 +- cpp/include/cudf/fixed_point/temporary.hpp | 53 +------------------ cpp/include/cudf/utilities/traits.hpp | 2 +- cpp/src/reductions/scan/scan_exclusive.cu | 4 +- cpp/src/reductions/scan/scan_inclusive.cu | 4 +- cpp/src/round/round.cu | 12 ++--- cpp/src/strings/convert/utilities.cuh | 4 +- cpp/tests/reductions/reduction_tests.cpp | 2 +- 10 files changed, 21 insertions(+), 70 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/device_operators.cuh b/cpp/include/cudf/detail/utilities/device_operators.cuh index 535f8d52ab4..f8792061612 100644 --- a/cpp/include/cudf/detail/utilities/device_operators.cuh +++ b/cpp/include/cudf/detail/utilities/device_operators.cuh @@ -99,7 +99,7 @@ struct DeviceMin { !cudf::is_fixed_point()>* = nullptr> static constexpr T identity() { - return numeric::detail::numeric_limits::max(); + return cuda::std::numeric_limits::max(); } template ()>* = nullptr> @@ -137,7 +137,7 @@ struct DeviceMax { !cudf::is_fixed_point()>* = nullptr> static constexpr T identity() { - return numeric::detail::numeric_limits::lowest(); + return cuda::std::numeric_limits::lowest(); } template ()>* = nullptr> diff --git a/cpp/include/cudf/detail/utilities/integer_utils.hpp b/cpp/include/cudf/detail/utilities/integer_utils.hpp index 365ee1e91f4..ddedab3944c 100644 --- a/cpp/include/cudf/detail/utilities/integer_utils.hpp +++ b/cpp/include/cudf/detail/utilities/integer_utils.hpp @@ -155,7 +155,7 @@ constexpr inline bool is_a_power_of_two(I val) noexcept template constexpr inline auto absolute_value(T value) -> T { - if constexpr (numeric::detail::is_signed()) return numeric::detail::abs(value); + if constexpr (cuda::std::is_signed()) return numeric::detail::abs(value); return value; } diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index f4254ffe4ba..930fef40747 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -57,7 +57,7 @@ constexpr inline auto is_supported_representation_type() template constexpr inline auto is_supported_construction_value_type() { - return numeric::detail::is_integral() || cuda::std::is_floating_point::value; + return cuda::std::is_integral() || cuda::std::is_floating_point::value; } // Helper functions for `fixed_point` type @@ -279,7 +279,7 @@ class fixed_point { * @return The `fixed_point` number in base 10 (aka human readable format) */ template ()>* = nullptr> + typename cuda::std::enable_if_t::value>* = nullptr> explicit constexpr operator U() const { // Don't cast to U until converting to Rep because in certain cases casting to U before shifting diff --git a/cpp/include/cudf/fixed_point/temporary.hpp b/cpp/include/cudf/fixed_point/temporary.hpp index 12b10fee91d..49c83090da7 100644 --- a/cpp/include/cudf/fixed_point/temporary.hpp +++ b/cpp/include/cudf/fixed_point/temporary.hpp @@ -20,45 +20,14 @@ // Note: The versions are used in order for Jitify to work with our fixed_point type. // Jitify is needed for several algorithms (binaryop, rolling, etc) +#include #include #include -#include #include namespace numeric { namespace detail { -namespace numeric_limits { - -template -static constexpr auto max() -> T -{ - if constexpr (std::is_same_v) { - // 170,141,183,460,469,231,731,687,303,715,884,105,727 - __int128_t max = 1; - for (int i = 0; i < 126; ++i) - max *= 2; - return max + (max - 1); - } - - return std::numeric_limits::max(); -} - -template -static constexpr auto lowest() -> T -{ - if constexpr (std::is_same_v) { - // -170,141,183,460,469,231,731,687,303,715,884,105,728 - __int128_t lowest = -1; - for (int i = 0; i < 127; ++i) - lowest *= 2; - return lowest; - } - - return std::numeric_limits::lowest(); -} - -} // namespace numeric_limits template auto to_string(T value) -> std::string @@ -69,7 +38,7 @@ auto to_string(T value) -> std::string if (sign) { value += 1; // avoid overflowing if value == _int128_t lowest value *= -1; - if (value == detail::numeric_limits::max<__int128_t>()) + if (value == cuda::std::numeric_limits<__int128_t>::max()) return "-170141183460469231731687303715884105728"; value += 1; // can add back the one, no need to avoid overflow anymore } @@ -104,23 +73,5 @@ CUDA_HOST_DEVICE_CALLABLE auto max(T lhs, T rhs) return lhs > rhs ? lhs : rhs; } -template -constexpr auto is_signed() -{ - return std::is_signed::value || std::is_same_v; -} - -template -constexpr auto is_integral() -{ - return cuda::std::is_integral::value || cuda::std::is_same_v; -} - -template -constexpr auto is_arithmetic() -{ - return numeric::detail::is_integral() || cuda::std::is_floating_point_v; -} - } // namespace detail } // namespace numeric diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index dbb06865f20..388a2e8aace 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -152,7 +152,7 @@ constexpr inline bool is_equality_comparable() template constexpr inline bool is_numeric() { - return numeric::detail::is_integral() or std::is_floating_point::value; + return cuda::std::is_integral() or std::is_floating_point::value; } struct is_numeric_impl { diff --git a/cpp/src/reductions/scan/scan_exclusive.cu b/cpp/src/reductions/scan/scan_exclusive.cu index 5c3810743a9..200ba5a7a15 100644 --- a/cpp/src/reductions/scan/scan_exclusive.cu +++ b/cpp/src/reductions/scan/scan_exclusive.cu @@ -50,7 +50,7 @@ struct scan_dispatcher { * @param mr Device memory resource used to allocate the returned column's device memory * @return Output column with scan results */ - template ()>* = nullptr> + template ::value>* = nullptr> std::unique_ptr operator()(column_view const& input, null_policy, rmm::cuda_stream_view stream, @@ -72,7 +72,7 @@ struct scan_dispatcher { } template - std::enable_if_t(), std::unique_ptr> operator()( + std::enable_if_t::value, std::unique_ptr> operator()( Args&&...) { CUDF_FAIL("Non-arithmetic types not supported for exclusive scan"); diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu index 5ba500b10a7..ef804f244e9 100644 --- a/cpp/src/reductions/scan/scan_inclusive.cu +++ b/cpp/src/reductions/scan/scan_inclusive.cu @@ -122,11 +122,11 @@ struct scan_dispatcher { template static constexpr bool is_supported() { - return numeric::detail::is_arithmetic() || is_string_supported(); + return cuda::std::is_arithmetic() || is_string_supported(); } // for arithmetic types - template ()>* = nullptr> + template ::value>* = nullptr> auto inclusive_scan(column_view const& input_view, null_policy, rmm::cuda_stream_view stream, diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index ab2acc91c9d..3a6a2beda45 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -86,7 +86,7 @@ struct half_up_zero { return generic_round(e); } - template ()>* = nullptr> + template ::value>* = nullptr> __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation @@ -105,7 +105,7 @@ struct half_up_positive { return integer_part + generic_round(fractional_part * n) / n; } - template ()>* = nullptr> + template ::value>* = nullptr> __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation @@ -122,7 +122,7 @@ struct half_up_negative { return generic_round(e / n) * n; } - template ()>* = nullptr> + template ::value>* = nullptr> __device__ U operator()(U e) { auto const down = (e / n) * n; // result from rounding down @@ -139,7 +139,7 @@ struct half_even_zero { return generic_round_half_even(e); } - template ()>* = nullptr> + template ::value>* = nullptr> __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation @@ -158,7 +158,7 @@ struct half_even_positive { return integer_part + generic_round_half_even(fractional_part * n) / n; } - template ()>* = nullptr> + template ::value>* = nullptr> __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation @@ -175,7 +175,7 @@ struct half_even_negative { return generic_round_half_even(e / n) * n; } - template ()>* = nullptr> + template ::value>* = nullptr> __device__ U operator()(U e) { auto const down_over_n = e / n; // use this to determine HALF_EVEN case diff --git a/cpp/src/strings/convert/utilities.cuh b/cpp/src/strings/convert/utilities.cuh index 6a6c92ba7c7..0006592e599 100644 --- a/cpp/src/strings/convert/utilities.cuh +++ b/cpp/src/strings/convert/utilities.cuh @@ -64,7 +64,7 @@ __device__ inline size_type integer_to_string(IntegerType value, char* d_buffer) *d_buffer = '0'; return 1; } - bool const is_negative = numeric::detail::is_signed() ? (value < 0) : false; + bool const is_negative = cuda::std::is_signed() ? (value < 0) : false; constexpr IntegerType base = 10; constexpr int MAX_DIGITS = 20; // largest 64-bit integer is 20 digits @@ -98,7 +98,7 @@ constexpr size_type count_digits(IntegerType value) { // TODO definitely broken if (value == 0) return 1; - bool is_negative = numeric::detail::is_signed() ? (value < 0) : false; + bool is_negative = cuda::std::is_signed() ? (value < 0) : false; // abs(std::numeric_limits::min()) is negative; // for all integer types, the max() and min() values have the same number of digits value = (value == std::numeric_limits::min()) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index 9a919c63d28..a2194bc1b55 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -1454,7 +1454,7 @@ TEST_F(Decimal128Only, Decimal128ProductReduction3) auto const values = std::vector(127, -2); auto const scale = scale_type{0}; auto const column = fp_wrapper{values.cbegin(), values.cend(), scale}; - auto const lowest = numeric::detail::numeric_limits::lowest(); + auto const lowest = cuda::std::numeric_limits::lowest(); auto const expected = decimal128{scaled_integer{lowest, scale}}; auto const out_type = cudf::data_type{cudf::type_id::DECIMAL128, scale}; From 73b36825ce9cac4d7309010cbc007d571e77a325 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 4 Aug 2021 16:19:11 +0000 Subject: [PATCH 034/112] cudf::fill(_in_place) fix for decimal128 --- cpp/src/filling/fill.cu | 14 ++++++----- cpp/tests/filling/fill_tests.cpp | 43 ++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 6 deletions(-) diff --git a/cpp/src/filling/fill.cu b/cpp/src/filling/fill.cu index b62d2ed4f8f..ab1bca86444 100644 --- a/cpp/src/filling/fill.cu +++ b/cpp/src/filling/fill.cu @@ -77,8 +77,7 @@ struct in_place_fill_range_dispatch { auto unscaled = static_cast const&>(value).value(); using RepType = typename T::rep; auto s = cudf::numeric_scalar(unscaled, value.is_valid()); - auto view = cudf::bit_cast(destination, s.type()); - in_place_fill(view, begin, end, s, stream); + in_place_fill(destination, begin, end, s, stream); } template @@ -93,13 +92,15 @@ struct out_of_place_fill_range_dispatch { cudf::column_view const& input; template - std::enable_if_t(), std::unique_ptr> + std::enable_if_t() and not cudf::is_fixed_point(), + std::unique_ptr> operator()(Args...) { CUDF_FAIL("Unsupported type in fill."); } - template ())> + template () or cudf::is_fixed_point())> std::unique_ptr operator()( cudf::size_type begin, cudf::size_type end, @@ -116,8 +117,9 @@ struct out_of_place_fill_range_dispatch { 0); } - auto ret_view = p_ret->mutable_view(); - in_place_fill(ret_view, begin, end, value, stream); + auto ret_view = p_ret->mutable_view(); + using DeviceType = cudf::device_storage_type_t; + in_place_fill(ret_view, begin, end, value, stream); } return p_ret; diff --git a/cpp/tests/filling/fill_tests.cpp b/cpp/tests/filling/fill_tests.cpp index 75c0cad20e7..3173a23d493 100644 --- a/cpp/tests/filling/fill_tests.cpp +++ b/cpp/tests/filling/fill_tests.cpp @@ -363,4 +363,47 @@ TEST_F(FillErrorTestFixture, DTypeMismatch) EXPECT_THROW(auto p_ret = cudf::fill(destination, 0, 10, *p_val), cudf::logic_error); } +template +class FixedPointAllReps : public cudf::test::BaseFixture { +}; + +TYPED_TEST_CASE(FixedPointAllReps, cudf::test::FixedPointTypes); + +TYPED_TEST(FixedPointAllReps, OutOfPlaceFill) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + for (auto const i : {0, -1, -2, -3, -4}) { + auto const scale = scale_type{i}; + auto const column = fp_wrapper{{4104, 42, 1729, 55}, scale}; + auto const expected = fp_wrapper{{42, 42, 42, 42}, scale}; + auto const scalar = cudf::make_fixed_point_scalar(42, scale); + + auto const result = cudf::fill(column, 0, 4, *scalar); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); + } +} + +TYPED_TEST(FixedPointAllReps, InPlaceFill) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + for (auto const i : {0, -1, -2, -3, -4}) { + auto const scale = scale_type{i}; + auto column = fp_wrapper{{4104, 42, 1729, 55}, scale}; + auto const expected = fp_wrapper{{42, 42, 42, 42}, scale}; + auto const scalar = cudf::make_fixed_point_scalar(42, scale); + + auto mut_column = cudf::mutable_column_view{column}; + cudf::fill_in_place(mut_column, 0, 4, *scalar); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(column, expected); + } +} + CUDF_TEST_PROGRAM_MAIN() From bcd18361208d206c4bcde30f6000962b2a80a8bb Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Thu, 5 Aug 2021 05:35:17 +0000 Subject: [PATCH 035/112] Remove TODOs --- cpp/include/cudf/fixed_point/fixed_point.hpp | 1 - cpp/include/cudf/utilities/type_dispatcher.hpp | 7 ++++++- cpp/src/strings/convert/convert_fixed_point.cu | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index 930fef40747..4891bc4ba2d 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -53,7 +53,6 @@ constexpr inline auto is_supported_representation_type() cuda::std::is_same_v; } -// TODO make a temporary::is_integral function template constexpr inline auto is_supported_construction_value_type() { diff --git a/cpp/include/cudf/utilities/type_dispatcher.hpp b/cpp/include/cudf/utilities/type_dispatcher.hpp index e0e7254588d..40e03dc62c3 100644 --- a/cpp/include/cudf/utilities/type_dispatcher.hpp +++ b/cpp/include/cudf/utilities/type_dispatcher.hpp @@ -121,7 +121,12 @@ constexpr bool type_id_matches_device_storage_type(type_id id) (id == type_id::DECIMAL128 && std::is_same_v) || id == type_to_id(); } -// TODO docs +/** + * @brief Checks if `id` is fixed_point (DECIMAL32/64/128) + * + * @return `true` if `id` is `DECIMAL32`, `DECIMAL64` or `DECIMAL128` + * @return `false` otherwise + */ constexpr bool is_fixed_point(cudf::type_id id) { return id == type_id::DECIMAL32 or // diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu index 9d0a6a3fdd2..20fbcc5f1b2 100644 --- a/cpp/src/strings/convert/convert_fixed_point.cu +++ b/cpp/src/strings/convert/convert_fixed_point.cu @@ -115,7 +115,7 @@ struct string_to_decimal_check_fn { return (exp_ten < scale) ? true : value <= static_cast( - std::numeric_limits::max() / // TODO probably broken + cuda::std::numeric_limits::max() / static_cast(exp10(static_cast(exp_ten - scale)))); } }; From 84f394bb9d2036962c9c6e93569b12fc10bcf0b0 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Mon, 23 Aug 2021 16:50:13 +0000 Subject: [PATCH 036/112] Initial string conversion changes --- .../strings/detail/convert/fixed_point.cuh | 12 ++--- .../strings/convert/convert_fixed_point.cu | 17 ++++--- cpp/tests/strings/fixed_point_tests.cpp | 49 +++++++++++-------- 3 files changed, 45 insertions(+), 33 deletions(-) diff --git a/cpp/include/cudf/strings/detail/convert/fixed_point.cuh b/cpp/include/cudf/strings/detail/convert/fixed_point.cuh index 53774ed948d..f437bebcda4 100644 --- a/cpp/include/cudf/strings/detail/convert/fixed_point.cuh +++ b/cpp/include/cudf/strings/detail/convert/fixed_point.cuh @@ -24,22 +24,22 @@ namespace detail { /** * @brief Return the integer component of a decimal string. * - * This is reads everything up to the exponent 'e' notation. + * This reads everything up to the exponent 'e' notation. * The return includes the integer digits and any exponent offset. * * @param[in,out] iter Start of characters to parse * @param[in] end End of characters to parse * @return Integer component and exponent offset. */ -__device__ inline thrust::pair parse_integer(char const*& iter, - char const* iter_end, - const char decimal_pt_char = '.') +__device__ inline thrust::pair<__uint128_t, int32_t> parse_integer(char const*& iter, + char const* iter_end, + const char decimal_pt_char = '.') { // highest value where another decimal digit cannot be appended without an overflow; // this preserves the most digits when scaling the final result - constexpr uint64_t decimal_max = (std::numeric_limits::max() - 9L) / 10L; + constexpr auto decimal_max = (cuda::std::numeric_limits<__uint128_t>::max() - 9L) / 10L; - uint64_t value = 0; // for checking overflow + __uint128_t value = 0; // for checking overflow int32_t exp_offset = 0; bool decimal_found = false; diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu index 20fbcc5f1b2..fb168f5dcd3 100644 --- a/cpp/src/strings/convert/convert_fixed_point.cu +++ b/cpp/src/strings/convert/convert_fixed_point.cu @@ -83,7 +83,7 @@ struct string_to_decimal_check_fn { int32_t const scale; string_to_decimal_check_fn(column_device_view const& d_strings, int32_t scale) - : d_strings(d_strings), scale(scale) + : d_strings{d_strings}, scale{scale} { } @@ -112,11 +112,16 @@ struct string_to_decimal_check_fn { exp_ten += exp_offset; // finally, check for overflow based on the exp_ten and scale values - return (exp_ten < scale) - ? true - : value <= static_cast( - cuda::std::numeric_limits::max() / - static_cast(exp10(static_cast(exp_ten - scale)))); + if (exp_ten < scale) { + // temporary bug fix + // TODO: fix once David's refactor/comprehensive bug fix is done + return (value / static_cast<__uint128_t>(exp10(static_cast(scale - exp_ten)))) <= + static_cast<__uint128_t>(cuda::std::numeric_limits::max()); + } else { + return value <= static_cast<__uint128_t>( + cuda::std::numeric_limits::max() / + static_cast(exp10(static_cast(exp_ten - scale)))); + } } }; diff --git a/cpp/tests/strings/fixed_point_tests.cpp b/cpp/tests/strings/fixed_point_tests.cpp index d8b570cee8b..674ad0d5bc6 100644 --- a/cpp/tests/strings/fixed_point_tests.cpp +++ b/cpp/tests/strings/fixed_point_tests.cpp @@ -182,38 +182,45 @@ TEST_F(StringsConvertTest, IsFixedPoint) cudf::data_type{cudf::type_id::DECIMAL32, numeric::scale_type{1}}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); - cudf::test::strings_column_wrapper big_numbers({ - "2147483647", - "-2147483647", - "2147483648", - "9223372036854775807", - "-9223372036854775807", - "9223372036854775808", - "100E2147483648", - }); - results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers), - cudf::data_type{cudf::type_id::DECIMAL32}); - auto const expected32 = - cudf::test::fixed_width_column_wrapper({true, true, false, false, false, false, false}); + cudf::test::strings_column_wrapper big_numbers({"2147483647", + "-2147483647", + "2147483648", + "9223372036854775807", + "-9223372036854775807", + "9223372036854775808", + "100E2147483648", + "170141183460469231731687303715884105727", + "170141183460469231731687303715884105728"}); + + results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers), + cudf::data_type{cudf::type_id::DECIMAL32}); + auto const expected32 = cudf::test::fixed_width_column_wrapper( + {true, true, false, false, false, false, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected32); - results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers), - cudf::data_type{cudf::type_id::DECIMAL64}); - auto const expected64 = - cudf::test::fixed_width_column_wrapper({true, true, true, true, true, false, false}); + results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers), + cudf::data_type{cudf::type_id::DECIMAL64}); + auto const expected64 = cudf::test::fixed_width_column_wrapper( + {true, true, true, true, true, false, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected64); + results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers), + cudf::data_type{cudf::type_id::DECIMAL128}); + auto const expected128 = cudf::test::fixed_width_column_wrapper( + {true, true, true, true, true, true, false, true, false}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected128); + results = cudf::strings::is_fixed_point( cudf::strings_column_view(big_numbers), cudf::data_type{cudf::type_id::DECIMAL32, numeric::scale_type{10}}); - auto const expected32_scaled = - cudf::test::fixed_width_column_wrapper({true, true, true, true, true, true, false}); + auto const expected32_scaled = cudf::test::fixed_width_column_wrapper( + {true, true, true, true, true, true, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected32_scaled); results = cudf::strings::is_fixed_point( cudf::strings_column_view(big_numbers), cudf::data_type{cudf::type_id::DECIMAL64, numeric::scale_type{-5}}); - auto const expected64_scaled = - cudf::test::fixed_width_column_wrapper({true, true, true, false, false, false, false}); + auto const expected64_scaled = cudf::test::fixed_width_column_wrapper( + {true, true, true, false, false, false, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected64_scaled); } From 7031551cde71a6069a819dab374556e6d54b134f Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 24 Aug 2021 18:43:35 +0000 Subject: [PATCH 037/112] Final string changes --- cpp/include/cudf/strings/detail/convert/fixed_point.cuh | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/cpp/include/cudf/strings/detail/convert/fixed_point.cuh b/cpp/include/cudf/strings/detail/convert/fixed_point.cuh index f437bebcda4..5f6ceb41588 100644 --- a/cpp/include/cudf/strings/detail/convert/fixed_point.cuh +++ b/cpp/include/cudf/strings/detail/convert/fixed_point.cuh @@ -142,11 +142,9 @@ __device__ DecimalType parse_decimal(char const* iter, char const* iter_end, int exp_ten += exp_offset; // shift the output value based on the exp_ten and the scale values - if (exp_ten < scale) { - value = value / static_cast(exp10(static_cast(scale - exp_ten))); - } else { - value = value * static_cast(exp10(static_cast(exp_ten - scale))); - } + value = exp_ten < scale + ? value / static_cast<__uint128_t>(exp10(static_cast(scale - exp_ten))) + : value * static_cast<__uint128_t>(exp10(static_cast(exp_ten - scale))); return static_cast(value) * (sign == 0 ? 1 : sign); } From ea97b9d00d3b2733b23e949193d9893ab76d1299 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 25 Aug 2021 20:49:52 +0000 Subject: [PATCH 038/112] Enhance casting tests for decimal128 --- cpp/tests/unary/cast_tests.cpp | 204 +++++++++++++++++++++++++++------ 1 file changed, 167 insertions(+), 37 deletions(-) diff --git a/cpp/tests/unary/cast_tests.cpp b/cpp/tests/unary/cast_tests.cpp index c71f6aa2019..43dca211ded 100644 --- a/cpp/tests/unary/cast_tests.cpp +++ b/cpp/tests/unary/cast_tests.cpp @@ -784,67 +784,197 @@ TYPED_TEST(FixedPointTests, FixedPointToFixedPointSameTypeidDownPositive) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTests, FixedPointToFixedPointDifferentTypeid) +TYPED_TEST(FixedPointTests, Decimal32ToDecimalXX) { using namespace numeric; - using decimalA = TypeParam; - using RepTypeA = cudf::device_storage_type_t; - using RepTypeB = std::conditional_t, int64_t, int32_t>; - using fp_wrapperA = cudf::test::fixed_point_column_wrapper; - using fp_wrapperB = cudf::test::fixed_point_column_wrapper; + using decimalXX = TypeParam; + using RepTypeFrom = int32_t; + using RepTypeTo = cudf::device_storage_type_t; + using fp_wrapperFrom = cudf::test::fixed_point_column_wrapper; + using fp_wrapperTo = cudf::test::fixed_point_column_wrapper; + + auto const input = fp_wrapperFrom{{1729, 17290, 172900, 1729000}, scale_type{-3}}; + auto const expected = fp_wrapperTo{{1729, 17290, 172900, 1729000}, scale_type{-3}}; + auto const result = cudf::cast(input, make_fixed_point_data_type(-3)); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointTests, Decimal64ToDecimalXX) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepTypeFrom = int64_t; + using RepTypeTo = cudf::device_storage_type_t; + using fp_wrapperFrom = cudf::test::fixed_point_column_wrapper; + using fp_wrapperTo = cudf::test::fixed_point_column_wrapper; + + auto const input = fp_wrapperFrom{{1729, 17290, 172900, 1729000}, scale_type{-3}}; + auto const expected = fp_wrapperTo{{1729, 17290, 172900, 1729000}, scale_type{-3}}; + auto const result = cudf::cast(input, make_fixed_point_data_type(-3)); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointTests, Decimal128ToDecimalXX) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepTypeFrom = __int128_t; + using RepTypeTo = cudf::device_storage_type_t; + using fp_wrapperFrom = cudf::test::fixed_point_column_wrapper; + using fp_wrapperTo = cudf::test::fixed_point_column_wrapper; + + auto const input = fp_wrapperFrom{{1729, 17290, 172900, 1729000}, scale_type{-3}}; + auto const expected = fp_wrapperTo{{1729, 17290, 172900, 1729000}, scale_type{-3}}; + auto const result = cudf::cast(input, make_fixed_point_data_type(-3)); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointTests, Decimal32ToDecimalXXWithSmallerScale) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepTypeFrom = int32_t; + using RepTypeTo = cudf::device_storage_type_t; + using fp_wrapperFrom = cudf::test::fixed_point_column_wrapper; + using fp_wrapperTo = cudf::test::fixed_point_column_wrapper; + + auto const input = fp_wrapperFrom{{1729, 17290, 172900, 1729000}, scale_type{-3}}; + auto const expected = fp_wrapperTo{{172900, 1729000, 17290000, 172900000}, scale_type{-5}}; + auto const result = cudf::cast(input, make_fixed_point_data_type(-5)); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointTests, Decimal64ToDecimalXXWithSmallerScale) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepTypeFrom = int64_t; + using RepTypeTo = cudf::device_storage_type_t; + using fp_wrapperFrom = cudf::test::fixed_point_column_wrapper; + using fp_wrapperTo = cudf::test::fixed_point_column_wrapper; - auto const input = fp_wrapperB{{1729, 17290, 172900, 1729000}, scale_type{-3}}; - auto const expected = fp_wrapperA{{1729, 17290, 172900, 1729000}, scale_type{-3}}; - auto const result = cudf::cast(input, make_fixed_point_data_type(-3)); + auto const input = fp_wrapperFrom{{1729, 17290, 172900, 1729000}, scale_type{-3}}; + auto const expected = fp_wrapperTo{{172900, 1729000, 17290000, 172900000}, scale_type{-5}}; + auto const result = cudf::cast(input, make_fixed_point_data_type(-5)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTests, FixedPointToFixedPointDifferentTypeidDown) +TYPED_TEST(FixedPointTests, Decimal128ToDecimalXXWithSmallerScale) { using namespace numeric; - using decimalA = TypeParam; - using RepTypeA = cudf::device_storage_type_t; - using RepTypeB = std::conditional_t, int64_t, int32_t>; - using fp_wrapperA = cudf::test::fixed_point_column_wrapper; - using fp_wrapperB = cudf::test::fixed_point_column_wrapper; + using decimalXX = TypeParam; + using RepTypeFrom = __int128_t; + using RepTypeTo = cudf::device_storage_type_t; + using fp_wrapperFrom = cudf::test::fixed_point_column_wrapper; + using fp_wrapperTo = cudf::test::fixed_point_column_wrapper; - auto const input = fp_wrapperB{{1729, 17290, 172900, 1729000}, scale_type{-3}}; - auto const expected = fp_wrapperA{{172900, 1729000, 17290000, 172900000}, scale_type{-5}}; - auto const result = cudf::cast(input, make_fixed_point_data_type(-5)); + auto const input = fp_wrapperFrom{{1729, 17290, 172900, 1729000}, scale_type{-3}}; + auto const expected = fp_wrapperTo{{172900, 1729000, 17290000, 172900000}, scale_type{-5}}; + auto const result = cudf::cast(input, make_fixed_point_data_type(-5)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTests, FixedPointToFixedPointDifferentTypeidUp) +TYPED_TEST(FixedPointTests, Decimal32ToDecimalXXWithLargerScale) { using namespace numeric; - using decimalA = TypeParam; - using RepTypeA = cudf::device_storage_type_t; - using RepTypeB = std::conditional_t, int64_t, int32_t>; - using fp_wrapperA = cudf::test::fixed_point_column_wrapper; - using fp_wrapperB = cudf::test::fixed_point_column_wrapper; + using decimalXX = TypeParam; + using RepTypeFrom = int32_t; + using RepTypeTo = cudf::device_storage_type_t; + using fp_wrapperFrom = cudf::test::fixed_point_column_wrapper; + using fp_wrapperTo = cudf::test::fixed_point_column_wrapper; - auto const input = fp_wrapperB{{1729, 17290, 172900, 1729000}, scale_type{-3}}; - auto const expected = fp_wrapperA{{1, 17, 172, 1729}, scale_type{0}}; - auto const result = cudf::cast(input, make_fixed_point_data_type(0)); + auto const input = fp_wrapperFrom{{1729, 17290, 172900, 1729000}, scale_type{-3}}; + auto const expected = fp_wrapperTo{{1, 17, 172, 1729}, scale_type{0}}; + auto const result = cudf::cast(input, make_fixed_point_data_type(0)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } -TYPED_TEST(FixedPointTests, FixedPointToFixedPointDifferentTypeidUpNullMask) +TYPED_TEST(FixedPointTests, Decimal64ToDecimalXXWithLargerScale) { using namespace numeric; - using decimalA = TypeParam; - using RepTypeA = cudf::device_storage_type_t; - using RepTypeB = std::conditional_t, int64_t, int32_t>; - using fp_wrapperA = cudf::test::fixed_point_column_wrapper; - using fp_wrapperB = cudf::test::fixed_point_column_wrapper; + using decimalXX = TypeParam; + using RepTypeFrom = int64_t; + using RepTypeTo = cudf::device_storage_type_t; + using fp_wrapperFrom = cudf::test::fixed_point_column_wrapper; + using fp_wrapperTo = cudf::test::fixed_point_column_wrapper; - auto const vec = std::vector{1729, 17290, 172900, 1729000}; - auto const input = fp_wrapperB{vec.cbegin(), vec.cend(), {1, 1, 1, 0}, scale_type{-3}}; - auto const expected = fp_wrapperA{{1, 17, 172, 1729000}, {1, 1, 1, 0}, scale_type{0}}; - auto const result = cudf::cast(input, make_fixed_point_data_type(0)); + auto const input = fp_wrapperFrom{{1729, 17290, 172900, 1729000}, scale_type{-3}}; + auto const expected = fp_wrapperTo{{1, 17, 172, 1729}, scale_type{0}}; + auto const result = cudf::cast(input, make_fixed_point_data_type(0)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } + +TYPED_TEST(FixedPointTests, Decimal128ToDecimalXXWithLargerScale) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepTypeFrom = __int128_t; + using RepTypeTo = cudf::device_storage_type_t; + using fp_wrapperFrom = cudf::test::fixed_point_column_wrapper; + using fp_wrapperTo = cudf::test::fixed_point_column_wrapper; + + auto const input = fp_wrapperFrom{{1729, 17290, 172900, 1729000}, scale_type{-3}}; + auto const expected = fp_wrapperTo{{1, 17, 172, 1729}, scale_type{0}}; + auto const result = cudf::cast(input, make_fixed_point_data_type(0)); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointTests, Decimal32ToDecimalXXWithLargerScaleAndNullMask) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepTypeFrom = int32_t; + using RepTypeTo = cudf::device_storage_type_t; + using fp_wrapperFrom = cudf::test::fixed_point_column_wrapper; + using fp_wrapperTo = cudf::test::fixed_point_column_wrapper; + + auto const vec = std::vector{1729, 17290, 172900, 1729000}; + auto const input = fp_wrapperFrom{vec.cbegin(), vec.cend(), {1, 1, 1, 0}, scale_type{-3}}; + auto const expected = fp_wrapperTo{{1, 17, 172, 1729000}, {1, 1, 1, 0}, scale_type{0}}; + auto const result = cudf::cast(input, make_fixed_point_data_type(0)); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointTests, Decimal64ToDecimalXXWithLargerScaleAndNullMask) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepTypeFrom = int64_t; + using RepTypeTo = cudf::device_storage_type_t; + using fp_wrapperFrom = cudf::test::fixed_point_column_wrapper; + using fp_wrapperTo = cudf::test::fixed_point_column_wrapper; + + auto const vec = std::vector{1729, 17290, 172900, 1729000}; + auto const input = fp_wrapperFrom{vec.cbegin(), vec.cend(), {1, 1, 1, 0}, scale_type{-3}}; + auto const expected = fp_wrapperTo{{1, 17, 172, 1729000}, {1, 1, 1, 0}, scale_type{0}}; + auto const result = cudf::cast(input, make_fixed_point_data_type(0)); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointTests, Decimal128ToDecimalXXWithLargerScaleAndNullMask) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepTypeFrom = __int128_t; + using RepTypeTo = cudf::device_storage_type_t; + using fp_wrapperFrom = cudf::test::fixed_point_column_wrapper; + using fp_wrapperTo = cudf::test::fixed_point_column_wrapper; + + auto const vec = std::vector{1729, 17290, 172900, 1729000}; + auto const input = fp_wrapperFrom{vec.cbegin(), vec.cend(), {1, 1, 1, 0}, scale_type{-3}}; + auto const expected = fp_wrapperTo{{1, 17, 172, 1729000}, {1, 1, 1, 0}, scale_type{0}}; + auto const result = cudf::cast(input, make_fixed_point_data_type(0)); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} \ No newline at end of file From 655cceedc8762fcbc309a8ac08715bba53b583a2 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Thu, 26 Aug 2021 02:27:56 +0000 Subject: [PATCH 039/112] Merge conflict fixes --- cpp/include/cudf/strings/detail/convert/fixed_point.cuh | 4 ++-- cpp/src/strings/convert/convert_fixed_point.cu | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/include/cudf/strings/detail/convert/fixed_point.cuh b/cpp/include/cudf/strings/detail/convert/fixed_point.cuh index 325bb4d0967..aa3f544202f 100644 --- a/cpp/include/cudf/strings/detail/convert/fixed_point.cuh +++ b/cpp/include/cudf/strings/detail/convert/fixed_point.cuh @@ -17,7 +17,7 @@ #include #include -#include +#include namespace cudf { namespace strings { @@ -137,7 +137,7 @@ __device__ DecimalType parse_decimal(char const* iter, char const* iter_end, int // if string begins with a sign, continue with next character if (sign != 0) ++iter; - using UnsignedDecimalType = std::make_unsigned_t; + using UnsignedDecimalType = cuda::std::make_unsigned_t; auto [value, exp_offset] = parse_integer(iter, iter_end); if (value == 0) { return DecimalType{0}; } diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu index bbaae08c6b8..23e027be208 100644 --- a/cpp/src/strings/convert/convert_fixed_point.cu +++ b/cpp/src/strings/convert/convert_fixed_point.cu @@ -37,6 +37,8 @@ #include #include +#include + namespace cudf { namespace strings { namespace detail { @@ -97,7 +99,7 @@ struct string_to_decimal_check_fn { auto const iter_end = d_str.data() + d_str.size_bytes(); - using UnsignedDecimalType = std::make_unsigned_t; + using UnsignedDecimalType = cuda::std::make_unsigned_t; auto [value, exp_offset] = parse_integer(iter, iter_end); // only exponent notation is expected here From 2a894bd497906f67c76a31226571ab847edac46a Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Thu, 26 Aug 2021 20:55:25 +0000 Subject: [PATCH 040/112] Missed STRINGS fixes --- cpp/src/strings/convert/convert_fixed_point.cu | 3 ++- cpp/tests/strings/fixed_point_tests.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu index 23e027be208..524f6c614e8 100644 --- a/cpp/src/strings/convert/convert_fixed_point.cu +++ b/cpp/src/strings/convert/convert_fixed_point.cu @@ -37,6 +37,7 @@ #include #include +#include #include namespace cudf { @@ -117,7 +118,7 @@ struct string_to_decimal_check_fn { // finally, check for overflow based on the exp_ten and scale values return (exp_ten < scale) or value <= static_cast( - std::numeric_limits::max() / + cuda::std::numeric_limits::max() / static_cast(exp10(static_cast(exp_ten - scale)))); } }; diff --git a/cpp/tests/strings/fixed_point_tests.cpp b/cpp/tests/strings/fixed_point_tests.cpp index 511b4756ed7..8fffa8ce157 100644 --- a/cpp/tests/strings/fixed_point_tests.cpp +++ b/cpp/tests/strings/fixed_point_tests.cpp @@ -206,10 +206,10 @@ TEST_F(StringsConvertTest, IsFixedPoint) {true, true, true, true, true, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected64); - results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers), - cudf::data_type{cudf::type_id::DECIMAL128}); - auto const expected128 = cudf::test::fixed_width_column_wrapper( - {true, true, true, true, true, true, false, true, false}); + results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers), + cudf::data_type{cudf::type_id::DECIMAL128}); + auto const expected128 = + cudf::test::fixed_width_column_wrapper({true, true, true, true, true, true, true, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected128); results = cudf::strings::is_fixed_point( From d8813211cff33184e0c5293dc8c4fc810ade4064 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Thu, 26 Aug 2021 22:24:08 +0000 Subject: [PATCH 041/112] Enhance STRINGS_TEST --- cpp/tests/strings/fixed_point_tests.cpp | 42 ++++++++++++------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/cpp/tests/strings/fixed_point_tests.cpp b/cpp/tests/strings/fixed_point_tests.cpp index 8fffa8ce157..2b6883a080d 100644 --- a/cpp/tests/strings/fixed_point_tests.cpp +++ b/cpp/tests/strings/fixed_point_tests.cpp @@ -182,54 +182,52 @@ TEST_F(StringsConvertTest, IsFixedPoint) cudf::data_type{cudf::type_id::DECIMAL32, numeric::scale_type{1}}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); - cudf::test::strings_column_wrapper big_numbers({ - "2147483647", - "-2147483647", - "2147483648", - "9223372036854775807", - "-9223372036854775807", - "9223372036854775808", - "9223372036854775808000", - "100E2147483648", - // "170141183460469231731687303715884105727", - // "170141183460469231731687303715884105728" TODO add these back - }); + cudf::test::strings_column_wrapper big_numbers({"2147483647", + "-2147483647", + "2147483648", + "9223372036854775807", + "-9223372036854775807", + "9223372036854775808", + "9223372036854775808000", + "100E2147483648", + "170141183460469231731687303715884105727", + "170141183460469231731687303715884105728"}); results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers), cudf::data_type{cudf::type_id::DECIMAL32}); auto const expected32 = cudf::test::fixed_width_column_wrapper( - {true, true, false, false, false, false, false, false}); + {true, true, false, false, false, false, false, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected32); results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers), cudf::data_type{cudf::type_id::DECIMAL64}); auto const expected64 = cudf::test::fixed_width_column_wrapper( - {true, true, true, true, true, false, false, false}); + {true, true, true, true, true, false, false, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected64); - results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers), - cudf::data_type{cudf::type_id::DECIMAL128}); - auto const expected128 = - cudf::test::fixed_width_column_wrapper({true, true, true, true, true, true, true, false}); + results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers), + cudf::data_type{cudf::type_id::DECIMAL128}); + auto const expected128 = cudf::test::fixed_width_column_wrapper( + {true, true, true, true, true, true, true, false, true, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected128); results = cudf::strings::is_fixed_point( cudf::strings_column_view(big_numbers), cudf::data_type{cudf::type_id::DECIMAL32, numeric::scale_type{10}}); auto const expected32_scaled = cudf::test::fixed_width_column_wrapper( - {true, true, true, true, true, true, false, false}); + {true, true, true, true, true, true, false, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected32_scaled); results = cudf::strings::is_fixed_point( cudf::strings_column_view(big_numbers), cudf::data_type{cudf::type_id::DECIMAL64, numeric::scale_type{10}}); - auto const expected64_scaled_positive = - cudf::test::fixed_width_column_wrapper({true, true, true, true, true, true, true, false}); + auto const expected64_scaled_positive = cudf::test::fixed_width_column_wrapper( + {true, true, true, true, true, true, true, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected64_scaled_positive); results = cudf::strings::is_fixed_point( cudf::strings_column_view(big_numbers), cudf::data_type{cudf::type_id::DECIMAL64, numeric::scale_type{-5}}); auto const expected64_scaled = cudf::test::fixed_width_column_wrapper( - {true, true, true, false, false, false, false, false}); + {true, true, true, false, false, false, false, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected64_scaled); } From 1380a0cebc75c08bf693cc986309b7fb7add7843 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Thu, 26 Aug 2021 22:46:05 +0000 Subject: [PATCH 042/112] Enhance ROUND tests --- cpp/tests/round/round_tests.cpp | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/cpp/tests/round/round_tests.cpp b/cpp/tests/round/round_tests.cpp index 825703274e2..b4050625570 100644 --- a/cpp/tests/round/round_tests.cpp +++ b/cpp/tests/round/round_tests.cpp @@ -587,6 +587,39 @@ TEST_F(RoundTests, Int64AtBoundaryHalfUp) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected5, result5->view()); } +TEST_F(RoundTests, FixedPointAtBoundaryTestHalfUp) +{ + using namespace numeric; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + auto const m = std::numeric_limits::max(); // 170141183460469231731687303715884105727 + + { + auto const input = fp_wrapper{{m}, scale_type{0}}; + auto const expected = fp_wrapper{{m / 100000}, scale_type{5}}; + auto const result = cudf::round(input, -5, cudf::rounding_method::HALF_UP); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } + + { + auto const input = fp_wrapper{{m}, scale_type{0}}; + auto const expected = fp_wrapper{{m / 100000000000}, scale_type{11}}; + auto const result = cudf::round(input, -11, cudf::rounding_method::HALF_UP); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } + + { + auto const input = fp_wrapper{{m}, scale_type{0}}; + auto const expected = fp_wrapper{{m / 1000000000000000}, scale_type{15}}; + auto const result = cudf::round(input, -15, cudf::rounding_method::HALF_UP); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } +} + TEST_F(RoundTests, BoolTestHalfUp) { using fw_wrapper = cudf::test::fixed_width_column_wrapper; From b5d449331207c43c3f64b123c44389f6279ef9c7 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Thu, 26 Aug 2021 22:57:39 +0000 Subject: [PATCH 043/112] Fix FIXED_POINT_TESTs --- cpp/tests/fixed_point/fixed_point_tests.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/tests/fixed_point/fixed_point_tests.cpp b/cpp/tests/fixed_point/fixed_point_tests.cpp index 30b3284e032..a90e0f0f541 100644 --- a/cpp/tests/fixed_point/fixed_point_tests.cpp +++ b/cpp/tests/fixed_point/fixed_point_tests.cpp @@ -524,8 +524,8 @@ TEST_F(FixedPointTest, PositiveScaleWithValuesOutsideUnderlyingType32) auto const expected2 = fp_wrapper{{50000000}, scale_type{6}}; auto const type = cudf::data_type{cudf::type_id::DECIMAL32, 6}; - auto const result1 = cudf::jit::binary_operation(a, b, cudf::binary_operator::ADD, type); - auto const result2 = cudf::jit::binary_operation(a, c, cudf::binary_operator::DIV, type); + auto const result1 = cudf::binary_operation(a, b, cudf::binary_operator::ADD, type); + auto const result2 = cudf::binary_operation(a, c, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, result1->view()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, result2->view()); @@ -547,8 +547,8 @@ TEST_F(FixedPointTest, PositiveScaleWithValuesOutsideUnderlyingType64) auto const expected2 = fp_wrapper{{50000000}, scale_type{100}}; auto const type = cudf::data_type{cudf::type_id::DECIMAL64, 100}; - auto const result1 = cudf::jit::binary_operation(a, b, cudf::binary_operator::ADD, type); - auto const result2 = cudf::jit::binary_operation(a, c, cudf::binary_operator::DIV, type); + auto const result1 = cudf::binary_operation(a, b, cudf::binary_operator::ADD, type); + auto const result2 = cudf::binary_operation(a, c, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, result1->view()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, result2->view()); @@ -570,10 +570,10 @@ TYPED_TEST(FixedPointTestAllReps, ExtremelyLargeNegativeScale) auto const expected2 = fp_wrapper{{5}, scale_type{-201}}; auto const type1 = cudf::data_type{cudf::type_to_id(), -202}; - auto const result1 = cudf::jit::binary_operation(a, b, cudf::binary_operator::ADD, type1); + auto const result1 = cudf::binary_operation(a, b, cudf::binary_operator::ADD, type1); auto const type2 = cudf::data_type{cudf::type_to_id(), -201}; - auto const result2 = cudf::jit::binary_operation(a, c, cudf::binary_operator::DIV, type2); + auto const result2 = cudf::binary_operation(a, c, cudf::binary_operator::DIV, type2); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, result1->view()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, result2->view()); From 87151969f407f5179abb1124c26b4005ce329e11 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Fri, 27 Aug 2021 16:35:19 +0000 Subject: [PATCH 044/112] Enhance GROUPBY_TEST for decimal128 --- cpp/tests/groupby/max_tests.cpp | 14 +++++++--- cpp/tests/groupby/min_tests.cpp | 14 +++++++--- cpp/tests/groupby/sum_scan_tests.cpp | 41 ++++++++++++++++++++++------ cpp/tests/groupby/sum_tests.cpp | 23 ++++++++++------ 4 files changed, 68 insertions(+), 24 deletions(-) diff --git a/cpp/tests/groupby/max_tests.cpp b/cpp/tests/groupby/max_tests.cpp index 0f2ebfe7788..44dac359935 100644 --- a/cpp/tests/groupby/max_tests.cpp +++ b/cpp/tests/groupby/max_tests.cpp @@ -228,13 +228,12 @@ TEST_F(groupby_dictionary_max_test, basic) } template -struct FixedPointTest_32_64_Reps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; -using RepTypes = ::testing::Types; -TYPED_TEST_CASE(FixedPointTest_32_64_Reps, RepTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTest_32_64_Reps, GroupBySortMaxDecimalAsValue) +TYPED_TEST(FixedPointTestAllReps, GroupBySortMaxDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; @@ -258,6 +257,13 @@ TYPED_TEST(FixedPointTest_32_64_Reps, GroupBySortMaxDecimalAsValue) } } +template +struct FixedPointTest_32_64_Reps : public cudf::test::BaseFixture { +}; + +using RepTypes = ::testing::Types; +TYPED_TEST_CASE(FixedPointTest_32_64_Reps, RepTypes); + TYPED_TEST(FixedPointTest_32_64_Reps, GroupByHashMaxDecimalAsValue) { using namespace numeric; diff --git a/cpp/tests/groupby/min_tests.cpp b/cpp/tests/groupby/min_tests.cpp index 041ed37d71a..f801104d6ea 100644 --- a/cpp/tests/groupby/min_tests.cpp +++ b/cpp/tests/groupby/min_tests.cpp @@ -228,13 +228,12 @@ TEST_F(groupby_dictionary_min_test, basic) } template -struct FixedPointTest_32_64_Reps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; -using RepTypes = ::testing::Types; -TYPED_TEST_CASE(FixedPointTest_32_64_Reps, RepTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTest_32_64_Reps, GroupBySortMinDecimalAsValue) +TYPED_TEST(FixedPointTestAllReps, GroupBySortMinDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; @@ -257,6 +256,13 @@ TYPED_TEST(FixedPointTest_32_64_Reps, GroupBySortMinDecimalAsValue) } } +template +struct FixedPointTest_32_64_Reps : public cudf::test::BaseFixture { +}; + +using RepTypes = ::testing::Types; +TYPED_TEST_CASE(FixedPointTest_32_64_Reps, RepTypes); + TYPED_TEST(FixedPointTest_32_64_Reps, GroupByHashMinDecimalAsValue) { using namespace numeric; diff --git a/cpp/tests/groupby/sum_scan_tests.cpp b/cpp/tests/groupby/sum_scan_tests.cpp index d5d52bf4272..d62759b2327 100644 --- a/cpp/tests/groupby/sum_scan_tests.cpp +++ b/cpp/tests/groupby/sum_scan_tests.cpp @@ -133,19 +133,19 @@ TYPED_TEST(groupby_sum_scan_test, null_keys_and_values) } template -struct FixedPointTest_32_64_Reps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; -using RepTypes = ::testing::Types; -TYPED_TEST_CASE(FixedPointTest_32_64_Reps, RepTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTest_32_64_Reps, GroupBySortSumScanDecimalAsValue) +TYPED_TEST(FixedPointTestAllReps, GroupBySortSumScanDecimalAsValue) { using namespace numeric; - using decimalXX = TypeParam; - using RepType = cudf::device_storage_type_t; - using fp_wrapper = fixed_point_column_wrapper; - using out_fp_wrapper = fixed_point_column_wrapper; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = fixed_point_column_wrapper; + using SumType = std::conditional_t, __int128_t, int64_t>; + using out_fp_wrapper = fixed_point_column_wrapper; for (auto const i : {2, 1, 0, -1, -2}) { auto const scale = scale_type{i}; @@ -162,5 +162,30 @@ TYPED_TEST(FixedPointTest_32_64_Reps, GroupBySortSumScanDecimalAsValue) } } +// struct Decimal128Only : public cudf::test::BaseFixture { +// }; + +// TEST_F(Decimal128Only, GroupBySortSumScanDecimalAsValue) +// { +// using namespace numeric; +// using RepType = cudf::device_storage_type_t; +// using fp_wrapper = fixed_point_column_wrapper; +// using out_fp_wrapper = fixed_point_column_wrapper; + +// for (auto const i : {2, 1, 0, -1, -2}) { +// auto const scale = scale_type{i}; +// // clang-format off +// auto const keys = key_wrapper{1, 2, 3, 1, 2, 2, 1, 3, 3, 2}; +// auto const vals = fp_wrapper{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, scale}; + +// auto const expect_keys = key_wrapper {1, 1, 1, 2, 2, 2, 2, 3, 3, 3}; +// auto const expect_vals_sum = out_fp_wrapper{{0, 3, 9, 1, 5, 10, 19, 2, 9, 17}, scale}; +// // clang-format on + +// auto agg2 = cudf::make_sum_aggregation(); +// test_single_scan(keys, vals, expect_keys, expect_vals_sum, std::move(agg2)); +// } +// } + } // namespace test } // namespace cudf diff --git a/cpp/tests/groupby/sum_tests.cpp b/cpp/tests/groupby/sum_tests.cpp index ca010d7572a..f81a63b179c 100644 --- a/cpp/tests/groupby/sum_tests.cpp +++ b/cpp/tests/groupby/sum_tests.cpp @@ -157,19 +157,19 @@ TYPED_TEST(groupby_sum_test, dictionary) } template -struct FixedPointTest_32_64_Reps : public cudf::test::BaseFixture { +struct FixedPointTestAllReps : public cudf::test::BaseFixture { }; -using RepTypes = ::testing::Types; -TYPED_TEST_CASE(FixedPointTest_32_64_Reps, RepTypes); +TYPED_TEST_CASE(FixedPointTestAllReps, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTest_32_64_Reps, GroupBySortSumDecimalAsValue) +TYPED_TEST(FixedPointTestAllReps, GroupBySortSumDecimalAsValue) { using namespace numeric; - using decimalXX = TypeParam; - using RepType = cudf::device_storage_type_t; - using fp_wrapper = cudf::test::fixed_point_column_wrapper; - using fp64_wrapper = cudf::test::fixed_point_column_wrapper; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + using SumType = std::conditional_t, __int128_t, int64_t>; + using fp64_wrapper = cudf::test::fixed_point_column_wrapper; using K = int32_t; for (auto const i : {2, 1, 0, -1, -2}) { @@ -193,6 +193,13 @@ TYPED_TEST(FixedPointTest_32_64_Reps, GroupBySortSumDecimalAsValue) } } +template +struct FixedPointTest_32_64_Reps : public cudf::test::BaseFixture { +}; + +using RepTypes = ::testing::Types; +TYPED_TEST_CASE(FixedPointTest_32_64_Reps, RepTypes); + TYPED_TEST(FixedPointTest_32_64_Reps, GroupByHashSumDecimalAsValue) { using namespace numeric; From 7952e90ac7df99ad9a09c0a87c022d2809939be2 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Fri, 27 Aug 2021 16:42:23 +0000 Subject: [PATCH 045/112] Delete commented out code --- cpp/tests/groupby/sum_scan_tests.cpp | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/cpp/tests/groupby/sum_scan_tests.cpp b/cpp/tests/groupby/sum_scan_tests.cpp index d62759b2327..6b813f8b6db 100644 --- a/cpp/tests/groupby/sum_scan_tests.cpp +++ b/cpp/tests/groupby/sum_scan_tests.cpp @@ -162,30 +162,5 @@ TYPED_TEST(FixedPointTestAllReps, GroupBySortSumScanDecimalAsValue) } } -// struct Decimal128Only : public cudf::test::BaseFixture { -// }; - -// TEST_F(Decimal128Only, GroupBySortSumScanDecimalAsValue) -// { -// using namespace numeric; -// using RepType = cudf::device_storage_type_t; -// using fp_wrapper = fixed_point_column_wrapper; -// using out_fp_wrapper = fixed_point_column_wrapper; - -// for (auto const i : {2, 1, 0, -1, -2}) { -// auto const scale = scale_type{i}; -// // clang-format off -// auto const keys = key_wrapper{1, 2, 3, 1, 2, 2, 1, 3, 3, 2}; -// auto const vals = fp_wrapper{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, scale}; - -// auto const expect_keys = key_wrapper {1, 1, 1, 2, 2, 2, 2, 3, 3, 3}; -// auto const expect_vals_sum = out_fp_wrapper{{0, 3, 9, 1, 5, 10, 19, 2, 9, 17}, scale}; -// // clang-format on - -// auto agg2 = cudf::make_sum_aggregation(); -// test_single_scan(keys, vals, expect_keys, expect_vals_sum, std::move(agg2)); -// } -// } - } // namespace test } // namespace cudf From 10d58a3e5f2f2a334e156af2fae5f194f536c4a1 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 31 Aug 2021 20:12:54 +0000 Subject: [PATCH 046/112] Support hash groupby decimal128 (by making is sort) - initial change --- cpp/src/groupby/hash/groupby.cu | 7 ++++--- cpp/tests/groupby/max_tests.cpp | 9 +-------- cpp/tests/groupby/min_tests.cpp | 9 +-------- cpp/tests/groupby/sum_tests.cpp | 18 ++++++------------ 4 files changed, 12 insertions(+), 31 deletions(-) diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 87f83c6edd6..e94c119596f 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -647,9 +647,10 @@ std::unique_ptr groupby_null_templated(table_view const& keys, bool can_use_hash_groupby(table_view const& keys, host_span requests) { return std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) { - return std::all_of(r.aggregations.begin(), r.aggregations.end(), [](auto const& a) { - return is_hash_aggregation(a->kind); - }); + return (r.values.type().id() != cudf::type_id::DECIMAL128) and + std::all_of(r.aggregations.begin(), r.aggregations.end(), [](auto const& a) { + return is_hash_aggregation(a->kind); + }); }); } diff --git a/cpp/tests/groupby/max_tests.cpp b/cpp/tests/groupby/max_tests.cpp index a8fdbef8384..ef72fff45ac 100644 --- a/cpp/tests/groupby/max_tests.cpp +++ b/cpp/tests/groupby/max_tests.cpp @@ -281,14 +281,7 @@ TYPED_TEST(FixedPointTestAllReps, GroupBySortMaxDecimalAsValue) } } -template -struct FixedPointTest_32_64_Reps : public cudf::test::BaseFixture { -}; - -using RepTypes = ::testing::Types; -TYPED_TEST_CASE(FixedPointTest_32_64_Reps, RepTypes); - -TYPED_TEST(FixedPointTest_32_64_Reps, GroupByHashMaxDecimalAsValue) +TYPED_TEST(FixedPointTestAllReps, GroupByHashMaxDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/groupby/min_tests.cpp b/cpp/tests/groupby/min_tests.cpp index c47b51b9ce1..044f97c3cac 100644 --- a/cpp/tests/groupby/min_tests.cpp +++ b/cpp/tests/groupby/min_tests.cpp @@ -280,14 +280,7 @@ TYPED_TEST(FixedPointTestAllReps, GroupBySortMinDecimalAsValue) } } -template -struct FixedPointTest_32_64_Reps : public cudf::test::BaseFixture { -}; - -using RepTypes = ::testing::Types; -TYPED_TEST_CASE(FixedPointTest_32_64_Reps, RepTypes); - -TYPED_TEST(FixedPointTest_32_64_Reps, GroupByHashMinDecimalAsValue) +TYPED_TEST(FixedPointTestAllReps, GroupByHashMinDecimalAsValue) { using namespace numeric; using decimalXX = TypeParam; diff --git a/cpp/tests/groupby/sum_tests.cpp b/cpp/tests/groupby/sum_tests.cpp index f81a63b179c..ed42386b694 100644 --- a/cpp/tests/groupby/sum_tests.cpp +++ b/cpp/tests/groupby/sum_tests.cpp @@ -193,20 +193,14 @@ TYPED_TEST(FixedPointTestAllReps, GroupBySortSumDecimalAsValue) } } -template -struct FixedPointTest_32_64_Reps : public cudf::test::BaseFixture { -}; - -using RepTypes = ::testing::Types; -TYPED_TEST_CASE(FixedPointTest_32_64_Reps, RepTypes); - -TYPED_TEST(FixedPointTest_32_64_Reps, GroupByHashSumDecimalAsValue) +TYPED_TEST(FixedPointTestAllReps, GroupByHashSumDecimalAsValue) { using namespace numeric; - using decimalXX = TypeParam; - using RepType = cudf::device_storage_type_t; - using fp_wrapper = cudf::test::fixed_point_column_wrapper; - using fp64_wrapper = cudf::test::fixed_point_column_wrapper; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + using SumType = std::conditional_t, __int128_t, int64_t>; + using fp64_wrapper = cudf::test::fixed_point_column_wrapper; using K = int32_t; for (auto const i : {2, 1, 0, -1, -2}) { From 60ce655d4e53162b7b7ad2b9c078a18e736dd001 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 31 Aug 2021 23:28:05 +0000 Subject: [PATCH 047/112] has_atomic_support --- cpp/src/groupby/hash/groupby.cu | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index e94c119596f..ede631d2f54 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -50,6 +50,8 @@ #include #include +#include + namespace cudf { namespace groupby { namespace detail { @@ -634,6 +636,20 @@ std::unique_ptr
groupby_null_templated(table_view const& keys, } // namespace +// TODO move this to more appropriate file +struct has_atomic_support_type_dispatcher { + template + bool operator()() + { + return cuda::std::atomic::is_always_lock_free; + } +}; + +bool has_atomic_support(cudf::data_type const& type) +{ + return type_dispatcher(type, has_atomic_support_type_dispatcher{}); +} + /** * @brief Indicates if a set of aggregation requests can be satisfied with a * hash-based groupby implementation. @@ -647,7 +663,7 @@ std::unique_ptr
groupby_null_templated(table_view const& keys, bool can_use_hash_groupby(table_view const& keys, host_span requests) { return std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) { - return (r.values.type().id() != cudf::type_id::DECIMAL128) and + return has_atomic_support(r.values.type()) and std::all_of(r.aggregations.begin(), r.aggregations.end(), [](auto const& a) { return is_hash_aggregation(a->kind); }); From 28aca7d8e6e544da9ffc37173b1a8e5fb6c20db0 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 1 Sep 2021 19:09:07 +0000 Subject: [PATCH 048/112] TEMPORARY - will revert later --- .../Modules/JitifyPreprocessKernels.cmake | 3 +- cpp/cmake/thirdparty/CUDF_GetLibcudacxx.cmake | 4 +- cpp/src/binaryop/binaryop.cpp | 190 +++++++++--------- 3 files changed, 99 insertions(+), 98 deletions(-) diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake index 7e2ec5254d3..e854f4fa1a3 100644 --- a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake +++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake @@ -55,8 +55,7 @@ function(jit_preprocess_files) endfunction() jit_preprocess_files(SOURCE_DIRECTORY ${CUDF_SOURCE_DIR}/src - FILES binaryop/jit/kernel.cu - transform/jit/masked_udf_kernel.cu + FILES transform/jit/masked_udf_kernel.cu transform/jit/kernel.cu rolling/jit/kernel.cu ) diff --git a/cpp/cmake/thirdparty/CUDF_GetLibcudacxx.cmake b/cpp/cmake/thirdparty/CUDF_GetLibcudacxx.cmake index 63d6d26802c..ef5db9ca91d 100644 --- a/cpp/cmake/thirdparty/CUDF_GetLibcudacxx.cmake +++ b/cpp/cmake/thirdparty/CUDF_GetLibcudacxx.cmake @@ -17,8 +17,8 @@ function(find_and_configure_libcudacxx VERSION) CPMFindPackage(NAME libcudacxx VERSION ${VERSION} - GIT_REPOSITORY https://github.com/NVIDIA/libcudacxx.git - GIT_TAG ${VERSION} + GIT_REPOSITORY https://gitlab-master.nvidia.com/nvhpc/libcudacxx.git + GIT_TAG staging/1.6.0 GIT_SHALLOW TRUE DOWNLOAD_ONLY TRUE ) diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index 785fba0b1a9..15c1747dc0e 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -20,7 +20,7 @@ #include "compiled/binary_ops.hpp" #include "jit/util.hpp" -#include +// #include #include #include @@ -134,41 +134,42 @@ void binary_operation(mutable_column_view& out, OperatorType op_type, rmm::cuda_stream_view stream) { - if (is_null_dependent(op)) { - std::string kernel_name = - jitify2::reflection::Template("cudf::binops::jit::kernel_v_s_with_validity") // - .instantiate(cudf::jit::get_type_name(out.type()), // list of template arguments - cudf::jit::get_type_name(lhs.type()), - cudf::jit::get_type_name(rhs.type()), - get_operator_name(op, op_type)); - - cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) - .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // - ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // - ->launch(out.size(), - cudf::jit::get_data_ptr(out), - cudf::jit::get_data_ptr(lhs), - cudf::jit::get_data_ptr(rhs), - out.null_mask(), - lhs.null_mask(), - lhs.offset(), - rhs.is_valid()); - } else { - std::string kernel_name = - jitify2::reflection::Template("cudf::binops::jit::kernel_v_s") // - .instantiate(cudf::jit::get_type_name(out.type()), // list of template arguments - cudf::jit::get_type_name(lhs.type()), - cudf::jit::get_type_name(rhs.type()), - get_operator_name(op, op_type)); - - cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) - .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // - ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // - ->launch(out.size(), - cudf::jit::get_data_ptr(out), - cudf::jit::get_data_ptr(lhs), - cudf::jit::get_data_ptr(rhs)); - } + // if (is_null_dependent(op)) { + // std::string kernel_name = + // jitify2::reflection::Template("cudf::binops::jit::kernel_v_s_with_validity") // + // .instantiate(cudf::jit::get_type_name(out.type()), // list of template arguments + // cudf::jit::get_type_name(lhs.type()), + // cudf::jit::get_type_name(rhs.type()), + // get_operator_name(op, op_type)); + + // cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) + // .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // + // ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // + // ->launch(out.size(), + // cudf::jit::get_data_ptr(out), + // cudf::jit::get_data_ptr(lhs), + // cudf::jit::get_data_ptr(rhs), + // out.null_mask(), + // lhs.null_mask(), + // lhs.offset(), + // rhs.is_valid()); + // } else { + // std::string kernel_name = + // jitify2::reflection::Template("cudf::binops::jit::kernel_v_s") // + // .instantiate(cudf::jit::get_type_name(out.type()), // list of template + // arguments + // cudf::jit::get_type_name(lhs.type()), + // cudf::jit::get_type_name(rhs.type()), + // get_operator_name(op, op_type)); + + // cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) + // .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // + // ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // + // ->launch(out.size(), + // cudf::jit::get_data_ptr(out), + // cudf::jit::get_data_ptr(lhs), + // cudf::jit::get_data_ptr(rhs)); + // } } void binary_operation(mutable_column_view& out, @@ -195,42 +196,43 @@ void binary_operation(mutable_column_view& out, binary_operator op, rmm::cuda_stream_view stream) { - if (is_null_dependent(op)) { - std::string kernel_name = - jitify2::reflection::Template("cudf::binops::jit::kernel_v_v_with_validity") // - .instantiate(cudf::jit::get_type_name(out.type()), // list of template arguments - cudf::jit::get_type_name(lhs.type()), - cudf::jit::get_type_name(rhs.type()), - get_operator_name(op, OperatorType::Direct)); - - cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) - .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // - ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // - ->launch(out.size(), - cudf::jit::get_data_ptr(out), - cudf::jit::get_data_ptr(lhs), - cudf::jit::get_data_ptr(rhs), - out.null_mask(), - lhs.null_mask(), - rhs.offset(), - rhs.null_mask(), - rhs.offset()); - } else { - std::string kernel_name = - jitify2::reflection::Template("cudf::binops::jit::kernel_v_v") // - .instantiate(cudf::jit::get_type_name(out.type()), // list of template arguments - cudf::jit::get_type_name(lhs.type()), - cudf::jit::get_type_name(rhs.type()), - get_operator_name(op, OperatorType::Direct)); - - cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) - .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // - ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // - ->launch(out.size(), - cudf::jit::get_data_ptr(out), - cudf::jit::get_data_ptr(lhs), - cudf::jit::get_data_ptr(rhs)); - } + // if (is_null_dependent(op)) { + // std::string kernel_name = + // jitify2::reflection::Template("cudf::binops::jit::kernel_v_v_with_validity") // + // .instantiate(cudf::jit::get_type_name(out.type()), // list of template arguments + // cudf::jit::get_type_name(lhs.type()), + // cudf::jit::get_type_name(rhs.type()), + // get_operator_name(op, OperatorType::Direct)); + + // cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) + // .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // + // ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // + // ->launch(out.size(), + // cudf::jit::get_data_ptr(out), + // cudf::jit::get_data_ptr(lhs), + // cudf::jit::get_data_ptr(rhs), + // out.null_mask(), + // lhs.null_mask(), + // rhs.offset(), + // rhs.null_mask(), + // rhs.offset()); + // } else { + // std::string kernel_name = + // jitify2::reflection::Template("cudf::binops::jit::kernel_v_v") // + // .instantiate(cudf::jit::get_type_name(out.type()), // list of template + // arguments + // cudf::jit::get_type_name(lhs.type()), + // cudf::jit::get_type_name(rhs.type()), + // get_operator_name(op, OperatorType::Direct)); + + // cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) + // .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // + // ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // + // ->launch(out.size(), + // cudf::jit::get_data_ptr(out), + // cudf::jit::get_data_ptr(lhs), + // cudf::jit::get_data_ptr(rhs)); + // } } void binary_operation(mutable_column_view& out, @@ -239,28 +241,28 @@ void binary_operation(mutable_column_view& out, const std::string& ptx, rmm::cuda_stream_view stream) { - std::string const output_type_name = cudf::jit::get_type_name(out.type()); - - std::string ptx_hash = - "prog_binop." + std::to_string(std::hash{}(ptx + output_type_name)); - std::string cuda_source = - cudf::jit::parse_single_function_ptx(ptx, "GENERIC_BINARY_OP", output_type_name); - - std::string kernel_name = - jitify2::reflection::Template("cudf::binops::jit::kernel_v_v") // - .instantiate(output_type_name, // list of template arguments - cudf::jit::get_type_name(lhs.type()), - cudf::jit::get_type_name(rhs.type()), - get_operator_name(binary_operator::GENERIC_BINARY, OperatorType::Direct)); - - cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) - .get_kernel( - kernel_name, {}, {{"binaryop/jit/operation-udf.hpp", cuda_source}}, {"-arch=sm_."}) // - ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // - ->launch(out.size(), - cudf::jit::get_data_ptr(out), - cudf::jit::get_data_ptr(lhs), - cudf::jit::get_data_ptr(rhs)); + // std::string const output_type_name = cudf::jit::get_type_name(out.type()); + + // std::string ptx_hash = + // "prog_binop." + std::to_string(std::hash{}(ptx + output_type_name)); + // std::string cuda_source = + // cudf::jit::parse_single_function_ptx(ptx, "GENERIC_BINARY_OP", output_type_name); + + // std::string kernel_name = + // jitify2::reflection::Template("cudf::binops::jit::kernel_v_v") // + // .instantiate(output_type_name, // list of template arguments + // cudf::jit::get_type_name(lhs.type()), + // cudf::jit::get_type_name(rhs.type()), + // get_operator_name(binary_operator::GENERIC_BINARY, OperatorType::Direct)); + + // cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) + // .get_kernel( + // kernel_name, {}, {{"binaryop/jit/operation-udf.hpp", cuda_source}}, {"-arch=sm_."}) // + // ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // + // ->launch(out.size(), + // cudf::jit::get_data_ptr(out), + // cudf::jit::get_data_ptr(lhs), + // cudf::jit::get_data_ptr(rhs)); } } // namespace jit From fe446a427e5c50b30b9c37781553f0190630ce4a Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Thu, 9 Sep 2021 01:36:09 +0000 Subject: [PATCH 049/112] Block group_by mean for decimal types --- cpp/include/cudf/detail/aggregation/aggregation.hpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index bcf9fa386d5..e5d62ef584f 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -951,10 +951,12 @@ struct target_type_impl { // Except for chrono types where result is chrono. (Use FloorDiv) // TODO: MEAN should be only be enabled for duration types - not for timestamps template -struct target_type_impl< - Source, - k, - std::enable_if_t() && !is_chrono() && (k == aggregation::MEAN)>> { +struct target_type_impl() // + and not is_chrono() // + and not is_fixed_point // + and (k == aggregation::MEAN)>> { using type = double; }; From efd0b62534260a2f6b0f42f990b7591b91274643 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Thu, 9 Sep 2021 05:17:15 +0000 Subject: [PATCH 050/112] Revert non-comprehensive fix --- cpp/include/cudf/detail/aggregation/aggregation.hpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 8745228a519..1d6399d4d00 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -953,10 +953,8 @@ struct target_type_impl { template struct target_type_impl() // - and not is_chrono() // - and not is_fixed_point // - and (k == aggregation::MEAN)>> { + std::enable_if_t() and not is_chrono() and + (k == aggregation::MEAN)>> { using type = double; }; From 5622a842c426944838784b8d9f36912306e8491e Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Fri, 17 Sep 2021 22:16:45 +0000 Subject: [PATCH 051/112] binary op changes --- .../Modules/JitifyPreprocessKernels.cmake | 3 +- cpp/src/binaryop/binaryop.cpp | 437 ++++-------------- cpp/tests/binaryop/binop-integration-test.cpp | 83 ++-- 3 files changed, 143 insertions(+), 380 deletions(-) diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake index e854f4fa1a3..7e2ec5254d3 100644 --- a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake +++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake @@ -55,7 +55,8 @@ function(jit_preprocess_files) endfunction() jit_preprocess_files(SOURCE_DIRECTORY ${CUDF_SOURCE_DIR}/src - FILES transform/jit/masked_udf_kernel.cu + FILES binaryop/jit/kernel.cu + transform/jit/masked_udf_kernel.cu transform/jit/kernel.cu rolling/jit/kernel.cu ) diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index 15c1747dc0e..486ace3424f 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -20,7 +20,7 @@ #include "compiled/binary_ops.hpp" #include "jit/util.hpp" -// #include +#include #include #include @@ -134,42 +134,41 @@ void binary_operation(mutable_column_view& out, OperatorType op_type, rmm::cuda_stream_view stream) { - // if (is_null_dependent(op)) { - // std::string kernel_name = - // jitify2::reflection::Template("cudf::binops::jit::kernel_v_s_with_validity") // - // .instantiate(cudf::jit::get_type_name(out.type()), // list of template arguments - // cudf::jit::get_type_name(lhs.type()), - // cudf::jit::get_type_name(rhs.type()), - // get_operator_name(op, op_type)); - - // cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) - // .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // - // ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // - // ->launch(out.size(), - // cudf::jit::get_data_ptr(out), - // cudf::jit::get_data_ptr(lhs), - // cudf::jit::get_data_ptr(rhs), - // out.null_mask(), - // lhs.null_mask(), - // lhs.offset(), - // rhs.is_valid()); - // } else { - // std::string kernel_name = - // jitify2::reflection::Template("cudf::binops::jit::kernel_v_s") // - // .instantiate(cudf::jit::get_type_name(out.type()), // list of template - // arguments - // cudf::jit::get_type_name(lhs.type()), - // cudf::jit::get_type_name(rhs.type()), - // get_operator_name(op, op_type)); - - // cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) - // .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // - // ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // - // ->launch(out.size(), - // cudf::jit::get_data_ptr(out), - // cudf::jit::get_data_ptr(lhs), - // cudf::jit::get_data_ptr(rhs)); - // } + if (is_null_dependent(op)) { + std::string kernel_name = + jitify2::reflection::Template("cudf::binops::jit::kernel_v_s_with_validity") // + .instantiate(cudf::jit::get_type_name(out.type()), // list of template arguments + cudf::jit::get_type_name(lhs.type()), + cudf::jit::get_type_name(rhs.type()), + get_operator_name(op, op_type)); + + cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) + .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // + ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // + ->launch(out.size(), + cudf::jit::get_data_ptr(out), + cudf::jit::get_data_ptr(lhs), + cudf::jit::get_data_ptr(rhs), + out.null_mask(), + lhs.null_mask(), + lhs.offset(), + rhs.is_valid()); + } else { + std::string kernel_name = + jitify2::reflection::Template("cudf::binops::jit::kernel_v_s") // + .instantiate(cudf::jit::get_type_name(out.type()), // list of template arguments + cudf::jit::get_type_name(lhs.type()), + cudf::jit::get_type_name(rhs.type()), + get_operator_name(op, op_type)); + + cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) + .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // + ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // + ->launch(out.size(), + cudf::jit::get_data_ptr(out), + cudf::jit::get_data_ptr(lhs), + cudf::jit::get_data_ptr(rhs)); + } } void binary_operation(mutable_column_view& out, @@ -196,43 +195,42 @@ void binary_operation(mutable_column_view& out, binary_operator op, rmm::cuda_stream_view stream) { - // if (is_null_dependent(op)) { - // std::string kernel_name = - // jitify2::reflection::Template("cudf::binops::jit::kernel_v_v_with_validity") // - // .instantiate(cudf::jit::get_type_name(out.type()), // list of template arguments - // cudf::jit::get_type_name(lhs.type()), - // cudf::jit::get_type_name(rhs.type()), - // get_operator_name(op, OperatorType::Direct)); - - // cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) - // .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // - // ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // - // ->launch(out.size(), - // cudf::jit::get_data_ptr(out), - // cudf::jit::get_data_ptr(lhs), - // cudf::jit::get_data_ptr(rhs), - // out.null_mask(), - // lhs.null_mask(), - // rhs.offset(), - // rhs.null_mask(), - // rhs.offset()); - // } else { - // std::string kernel_name = - // jitify2::reflection::Template("cudf::binops::jit::kernel_v_v") // - // .instantiate(cudf::jit::get_type_name(out.type()), // list of template - // arguments - // cudf::jit::get_type_name(lhs.type()), - // cudf::jit::get_type_name(rhs.type()), - // get_operator_name(op, OperatorType::Direct)); - - // cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) - // .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // - // ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // - // ->launch(out.size(), - // cudf::jit::get_data_ptr(out), - // cudf::jit::get_data_ptr(lhs), - // cudf::jit::get_data_ptr(rhs)); - // } + if (is_null_dependent(op)) { + std::string kernel_name = + jitify2::reflection::Template("cudf::binops::jit::kernel_v_v_with_validity") // + .instantiate(cudf::jit::get_type_name(out.type()), // list of template arguments + cudf::jit::get_type_name(lhs.type()), + cudf::jit::get_type_name(rhs.type()), + get_operator_name(op, OperatorType::Direct)); + + cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) + .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // + ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // + ->launch(out.size(), + cudf::jit::get_data_ptr(out), + cudf::jit::get_data_ptr(lhs), + cudf::jit::get_data_ptr(rhs), + out.null_mask(), + lhs.null_mask(), + rhs.offset(), + rhs.null_mask(), + rhs.offset()); + } else { + std::string kernel_name = + jitify2::reflection::Template("cudf::binops::jit::kernel_v_v") // + .instantiate(cudf::jit::get_type_name(out.type()), // list of template arguments + cudf::jit::get_type_name(lhs.type()), + cudf::jit::get_type_name(rhs.type()), + get_operator_name(op, OperatorType::Direct)); + + cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) + .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // + ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // + ->launch(out.size(), + cudf::jit::get_data_ptr(out), + cudf::jit::get_data_ptr(lhs), + cudf::jit::get_data_ptr(rhs)); + } } void binary_operation(mutable_column_view& out, @@ -241,28 +239,28 @@ void binary_operation(mutable_column_view& out, const std::string& ptx, rmm::cuda_stream_view stream) { - // std::string const output_type_name = cudf::jit::get_type_name(out.type()); - - // std::string ptx_hash = - // "prog_binop." + std::to_string(std::hash{}(ptx + output_type_name)); - // std::string cuda_source = - // cudf::jit::parse_single_function_ptx(ptx, "GENERIC_BINARY_OP", output_type_name); - - // std::string kernel_name = - // jitify2::reflection::Template("cudf::binops::jit::kernel_v_v") // - // .instantiate(output_type_name, // list of template arguments - // cudf::jit::get_type_name(lhs.type()), - // cudf::jit::get_type_name(rhs.type()), - // get_operator_name(binary_operator::GENERIC_BINARY, OperatorType::Direct)); - - // cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) - // .get_kernel( - // kernel_name, {}, {{"binaryop/jit/operation-udf.hpp", cuda_source}}, {"-arch=sm_."}) // - // ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // - // ->launch(out.size(), - // cudf::jit::get_data_ptr(out), - // cudf::jit::get_data_ptr(lhs), - // cudf::jit::get_data_ptr(rhs)); + std::string const output_type_name = cudf::jit::get_type_name(out.type()); + + std::string ptx_hash = + "prog_binop." + std::to_string(std::hash{}(ptx + output_type_name)); + std::string cuda_source = + cudf::jit::parse_single_function_ptx(ptx, "GENERIC_BINARY_OP", output_type_name); + + std::string kernel_name = + jitify2::reflection::Template("cudf::binops::jit::kernel_v_v") // + .instantiate(output_type_name, // list of template arguments + cudf::jit::get_type_name(lhs.type()), + cudf::jit::get_type_name(rhs.type()), + get_operator_name(binary_operator::GENERIC_BINARY, OperatorType::Direct)); + + cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) + .get_kernel( + kernel_name, {}, {{"binaryop/jit/operation-udf.hpp", cuda_source}}, {"-arch=sm_."}) // + ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // + ->launch(out.size(), + cudf::jit::get_data_ptr(out), + cudf::jit::get_data_ptr(lhs), + cudf::jit::get_data_ptr(rhs)); } } // namespace jit @@ -422,241 +420,6 @@ void fixed_point_binary_operation_validation(binary_operator op, } namespace jit { -/** - * @brief Function to compute binary operation of one `column_view` and one `scalar` - * - * @param lhs Left-hand side `scalar` used in the binary operation - * @param rhs Right-hand side `column_view` used in the binary operation - * @param op `binary_operator` to be used to combine `lhs` and `rhs` - * @param mr Device memory resource to use for device memory allocation - * @param stream CUDA stream used for device memory operations - * @return std::unique_ptr Resulting output column from the binary operation - */ -std::unique_ptr fixed_point_binary_operation(scalar const& lhs, - column_view const& rhs, - binary_operator op, - cudf::data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - using namespace numeric; - - fixed_point_binary_operation_validation(op, lhs.type(), rhs.type(), output_type); - - if (rhs.is_empty()) - return make_fixed_width_column_for_output(lhs, rhs, op, output_type, stream, mr); - - auto const scale = binary_operation_fixed_point_scale(op, lhs.type().scale(), rhs.type().scale()); - auto const type = binops::is_comparison_binop(op) ? data_type{type_id::BOOL8} - : cudf::data_type{rhs.type().id(), scale}; - auto out = make_fixed_width_column_for_output(lhs, rhs, op, type, stream, mr); - auto out_view = out->mutable_view(); - - if (lhs.type().scale() != rhs.type().scale() && binops::is_same_scale_necessary(op)) { - // Adjust scalar/column so they have they same scale - if (rhs.type().scale() < lhs.type().scale()) { - auto const diff = lhs.type().scale() - rhs.type().scale(); - if (lhs.type().id() == type_id::DECIMAL32) { - auto const factor = numeric::detail::ipow(diff); - auto const val = static_cast const&>(lhs).value(); - auto const scale = scale_type{rhs.type().scale()}; - auto const scalar = make_fixed_point_scalar(val * factor, scale); - binops::jit::binary_operation(out_view, *scalar, rhs, op, stream); - } else if (lhs.type().id() == type_id::DECIMAL64) { - auto const factor = numeric::detail::ipow(diff); - auto const val = static_cast const&>(lhs).value(); - auto const scale = scale_type{rhs.type().scale()}; - auto const scalar = make_fixed_point_scalar(val * factor, scale); - binops::jit::binary_operation(out_view, *scalar, rhs, op, stream); - } else { - CUDF_EXPECTS(lhs.type().id() == type_id::DECIMAL128, "Unexpected DTYPE"); - auto const factor = numeric::detail::ipow<__int128_t, Radix::BASE_10>(diff); - auto const val = static_cast const&>(lhs).value(); - auto const scale = scale_type{rhs.type().scale()}; - auto const scalar = make_fixed_point_scalar(val * factor, scale); - binops::jit::binary_operation(out_view, *scalar, rhs, op, stream); - } - } else { - auto const diff = rhs.type().scale() - lhs.type().scale(); - auto const result = [&] { - if (lhs.type().id() == type_id::DECIMAL32) { - auto const factor = numeric::detail::ipow(diff); - auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); - return jit::binary_operation(*scalar, rhs, binary_operator::MUL, lhs.type(), stream, mr); - } else if (lhs.type().id() == type_id::DECIMAL64) { - auto const factor = numeric::detail::ipow(diff); - auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); - return jit::binary_operation(*scalar, rhs, binary_operator::MUL, lhs.type(), stream, mr); - } else { - CUDF_EXPECTS(lhs.type().id() == type_id::DECIMAL128, "Unexpected DTYPE"); - auto const factor = numeric::detail::ipow<__int128_t, Radix::BASE_10>(diff); - auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); - return jit::binary_operation(*scalar, rhs, binary_operator::MUL, lhs.type(), stream, mr); - } - }(); - binops::jit::binary_operation(out_view, lhs, result->view(), op, stream); - } - } else { - binops::jit::binary_operation(out_view, lhs, rhs, op, stream); - } - return output_type.scale() != scale ? cudf::cast(out_view, output_type) : std::move(out); -} - -/** - * @brief Function to compute binary operation of one `column_view` and one `scalar` - * - * @param lhs Left-hand side `column_view` used in the binary operation - * @param rhs Right-hand side `scalar` used in the binary operation - * @param op `binary_operator` to be used to combine `lhs` and `rhs` - * @param mr Device memory resource to use for device memory allocation - * @param stream CUDA stream used for device memory operations - * @return std::unique_ptr Resulting output column from the binary operation - */ -std::unique_ptr fixed_point_binary_operation(column_view const& lhs, - scalar const& rhs, - binary_operator op, - cudf::data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - using namespace numeric; - - fixed_point_binary_operation_validation(op, lhs.type(), rhs.type(), output_type); - - if (lhs.is_empty()) - return make_fixed_width_column_for_output(lhs, rhs, op, output_type, stream, mr); - - auto const scale = binary_operation_fixed_point_scale(op, lhs.type().scale(), rhs.type().scale()); - auto const type = binops::is_comparison_binop(op) ? data_type{type_id::BOOL8} - : cudf::data_type{lhs.type().id(), scale}; - auto out = make_fixed_width_column_for_output(lhs, rhs, op, type, stream, mr); - auto out_view = out->mutable_view(); - - if (lhs.type().scale() != rhs.type().scale() && binops::is_same_scale_necessary(op)) { - // Adjust scalar/column so they have they same scale - if (rhs.type().scale() > lhs.type().scale()) { - auto const diff = rhs.type().scale() - lhs.type().scale(); - if (rhs.type().id() == type_id::DECIMAL32) { - auto const factor = numeric::detail::ipow(diff); - auto const val = static_cast const&>(rhs).value(); - auto const scale = scale_type{lhs.type().scale()}; - auto const scalar = make_fixed_point_scalar(val * factor, scale); - binops::jit::binary_operation(out_view, lhs, *scalar, op, stream); - } else if (rhs.type().id() == type_id::DECIMAL64) { - auto const factor = numeric::detail::ipow(diff); - auto const val = static_cast const&>(rhs).value(); - auto const scale = scale_type{rhs.type().scale()}; - auto const scalar = make_fixed_point_scalar(val * factor, scale); - binops::jit::binary_operation(out_view, lhs, *scalar, op, stream); - } else { - CUDF_EXPECTS(rhs.type().id() == type_id::DECIMAL128, "Unexpected DTYPE"); - auto const factor = numeric::detail::ipow<__int128_t, Radix::BASE_10>(diff); - auto const val = static_cast const&>(rhs).value(); - auto const scale = scale_type{rhs.type().scale()}; - auto const scalar = make_fixed_point_scalar(val * factor, scale); - binops::jit::binary_operation(out_view, lhs, *scalar, op, stream); - } - } else { - auto const diff = lhs.type().scale() - rhs.type().scale(); - auto const result = [&] { - if (rhs.type().id() == type_id::DECIMAL32) { - auto const factor = numeric::detail::ipow(diff); - auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); - return jit::binary_operation(*scalar, lhs, binary_operator::MUL, rhs.type(), stream, mr); - } else if (rhs.type().id() == type_id::DECIMAL64) { - auto const factor = numeric::detail::ipow(diff); - auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); - return jit::binary_operation(*scalar, lhs, binary_operator::MUL, rhs.type(), stream, mr); - } else { - CUDF_EXPECTS(rhs.type().id() == type_id::DECIMAL128, "Unexpected DTYPE"); - auto const factor = numeric::detail::ipow<__int128_t, Radix::BASE_10>(diff); - auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); - return jit::binary_operation(*scalar, lhs, binary_operator::MUL, rhs.type(), stream, mr); - } - }(); - binops::jit::binary_operation(out_view, result->view(), rhs, op, stream); - } - } else { - binops::jit::binary_operation(out_view, lhs, rhs, op, stream); - } - return output_type.scale() != scale ? cudf::cast(out_view, output_type) : std::move(out); -} - -/** - * @brief Function to compute binary operation of two `column_view`s - * - * @param lhs Left-hand side `column_view` used in the binary operation - * @param rhs Right-hand side `column_view` used in the binary operation - * @param op `binary_operator` to be used to combine `lhs` and `rhs` - * @param mr Device memory resource to use for device memory allocation - * @param stream CUDA stream used for device memory operations - * @return std::unique_ptr Resulting output column from the binary operation - */ -std::unique_ptr fixed_point_binary_operation(column_view const& lhs, - column_view const& rhs, - binary_operator op, - cudf::data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - using namespace numeric; - - fixed_point_binary_operation_validation(op, lhs.type(), rhs.type(), output_type); - - if (lhs.is_empty() or rhs.is_empty()) - return make_fixed_width_column_for_output(lhs, rhs, op, output_type, stream, mr); - - auto const scale = binary_operation_fixed_point_scale(op, lhs.type().scale(), rhs.type().scale()); - auto const type = binops::is_comparison_binop(op) ? data_type{type_id::BOOL8} - : cudf::data_type{lhs.type().id(), scale}; - auto out = make_fixed_width_column_for_output(lhs, rhs, op, type, stream, mr); - auto out_view = out->mutable_view(); - - if (lhs.type().scale() != rhs.type().scale() && binops::is_same_scale_necessary(op)) { - if (rhs.type().scale() < lhs.type().scale()) { - auto const diff = lhs.type().scale() - rhs.type().scale(); - auto const result = [&] { - if (lhs.type().id() == type_id::DECIMAL32) { - auto const factor = numeric::detail::ipow(diff); - auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); - return jit::binary_operation(*scalar, lhs, binary_operator::MUL, rhs.type(), stream, mr); - } else if (lhs.type().id() == type_id::DECIMAL64) { - auto const factor = numeric::detail::ipow(diff); - auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); - return jit::binary_operation(*scalar, lhs, binary_operator::MUL, rhs.type(), stream, mr); - } else { - CUDF_EXPECTS(lhs.type().id() == type_id::DECIMAL128, "Unexpected DTYPE"); - auto const factor = numeric::detail::ipow<__int128_t, Radix::BASE_10>(diff); - auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); - return jit::binary_operation(*scalar, lhs, binary_operator::MUL, rhs.type(), stream, mr); - } - }(); - binops::jit::binary_operation(out_view, result->view(), rhs, op, stream); - } else { - auto const diff = rhs.type().scale() - lhs.type().scale(); - auto const result = [&] { - if (lhs.type().id() == type_id::DECIMAL32) { - auto const factor = numeric::detail::ipow(diff); - auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); - return jit::binary_operation(*scalar, rhs, binary_operator::MUL, lhs.type(), stream, mr); - } else if (lhs.type().id() == type_id::DECIMAL64) { - auto const factor = numeric::detail::ipow(diff); - auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); - return jit::binary_operation(*scalar, rhs, binary_operator::MUL, lhs.type(), stream, mr); - } else { - CUDF_EXPECTS(lhs.type().id() == type_id::DECIMAL128, "Unexpected DTYPE"); - auto const factor = numeric::detail::ipow<__int128_t, Radix::BASE_10>(diff); - auto const scalar = make_fixed_point_scalar(factor, scale_type{-diff}); - return jit::binary_operation(*scalar, rhs, binary_operator::MUL, lhs.type(), stream, mr); - } - }(); - binops::jit::binary_operation(out_view, lhs, result->view(), op, stream); - } - } else { - binops::jit::binary_operation(out_view, lhs, rhs, op, stream); - } - return output_type.scale() != scale ? cudf::cast(out_view, output_type) : std::move(out); -} std::unique_ptr binary_operation(scalar const& lhs, column_view const& rhs, @@ -669,8 +432,8 @@ std::unique_ptr binary_operation(scalar const& lhs, if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING) return detail::binary_operation(lhs, rhs, op, output_type, stream, mr); - if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type())) - return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr); + // if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type())) TODO + // return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr); // Check for datatype CUDF_EXPECTS(is_fixed_width(output_type), "Invalid/Unsupported output datatype"); @@ -697,8 +460,8 @@ std::unique_ptr binary_operation(column_view const& lhs, if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING) return detail::binary_operation(lhs, rhs, op, output_type, stream, mr); - if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type())) - return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr); + // if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type())) + // return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr); // Check for datatype CUDF_EXPECTS(is_fixed_width(output_type), "Invalid/Unsupported output datatype"); @@ -727,8 +490,8 @@ std::unique_ptr binary_operation(column_view const& lhs, if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING) return detail::binary_operation(lhs, rhs, op, output_type, stream, mr); - if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type())) - return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr); + // if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type())) // TODO + // return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr); // Check for datatype CUDF_EXPECTS(is_fixed_width(output_type), "Invalid/Unsupported output datatype"); diff --git a/cpp/tests/binaryop/binop-integration-test.cpp b/cpp/tests/binaryop/binop-integration-test.cpp index 0c546424d0b..a0847f3eff2 100644 --- a/cpp/tests/binaryop/binop-integration-test.cpp +++ b/cpp/tests/binaryop/binop-integration-test.cpp @@ -2053,7 +2053,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd) cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, static_cast(lhs).type(), static_cast(rhs).type()); - auto const result = cudf::jit::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col, result->view()); } @@ -2086,7 +2086,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpMultiply) cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::MUL, static_cast(lhs).type(), static_cast(rhs).type()); - auto const result = cudf::jit::binary_operation(lhs, rhs, cudf::binary_operator::MUL, type); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::MUL, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col, result->view()); } @@ -2108,7 +2108,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpMultiply2) cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::MUL, static_cast(lhs).type(), static_cast(rhs).type()); - auto const result = cudf::jit::binary_operation(lhs, rhs, cudf::binary_operator::MUL, type); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::MUL, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2127,7 +2127,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpDiv) cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::DIV, static_cast(lhs).type(), static_cast(rhs).type()); - auto const result = cudf::jit::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2146,7 +2146,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpDiv2) cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::DIV, static_cast(lhs).type(), static_cast(rhs).type()); - auto const result = cudf::jit::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2163,7 +2163,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpDiv3) auto const type = cudf::binary_operation_fixed_point_output_type( cudf::binary_operator::DIV, static_cast(lhs).type(), rhs->type()); - auto const result = cudf::jit::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); + auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2183,7 +2183,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpDiv4) auto const type = cudf::binary_operation_fixed_point_output_type( cudf::binary_operator::DIV, static_cast(lhs).type(), rhs->type()); - auto const result = cudf::jit::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); + auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2202,7 +2202,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd2) cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, static_cast(lhs).type(), static_cast(rhs).type()); - auto const result = cudf::jit::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2221,7 +2221,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd3) cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, static_cast(lhs).type(), static_cast(rhs).type()); - auto const result = cudf::jit::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2238,7 +2238,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd4) auto const type = cudf::binary_operation_fixed_point_output_type( cudf::binary_operator::ADD, static_cast(lhs).type(), rhs->type()); - auto const result = cudf::jit::binary_operation(lhs, *rhs, cudf::binary_operator::ADD, type); + auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::ADD, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2255,7 +2255,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd5) auto const type = cudf::binary_operation_fixed_point_output_type( cudf::binary_operator::ADD, lhs->type(), static_cast(rhs).type()); - auto const result = cudf::jit::binary_operation(*lhs, rhs, cudf::binary_operator::ADD, type); + auto const result = cudf::binary_operation(*lhs, rhs, cudf::binary_operator::ADD, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2272,8 +2272,8 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd6) auto const expected2 = fp_wrapper{{0, 0, 1, 1, 1, 1}, scale_type{1}}; auto const type1 = cudf::data_type{cudf::type_to_id(), 0}; auto const type2 = cudf::data_type{cudf::type_to_id(), 1}; - auto const result1 = cudf::jit::binary_operation(col, col, cudf::binary_operator::ADD, type1); - auto const result2 = cudf::jit::binary_operation(col, col, cudf::binary_operator::ADD, type2); + auto const result1 = cudf::binary_operation(col, col, cudf::binary_operator::ADD, type1); + auto const result2 = cudf::binary_operation(col, col, cudf::binary_operator::ADD, type2); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, result1->view()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, result2->view()); @@ -2305,7 +2305,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpMultiplyScalar) auto const type = cudf::binary_operation_fixed_point_output_type( cudf::binary_operator::MUL, static_cast(lhs).type(), rhs->type()); - auto const result = cudf::jit::binary_operation(lhs, *rhs, cudf::binary_operator::MUL, type); + auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::MUL, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2324,7 +2324,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpSimplePlus) cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, static_cast(lhs).type(), static_cast(rhs).type()); - auto const result = cudf::jit::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2340,8 +2340,8 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualSimple) auto const col2 = fp_wrapper{{100, 200, 300, 400}, scale_type{-2}}; auto const expected = wrapper(trues.begin(), trues.end()); - auto const result = cudf::jit::binary_operation( - col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); + auto const result = + cudf::binary_operation(col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2357,7 +2357,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualSimpleScale0) auto const expected = wrapper(trues.begin(), trues.end()); auto const result = - cudf::jit::binary_operation(col, col, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); + cudf::binary_operation(col, col, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2372,8 +2372,8 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualSimpleScale0Null) auto const col2 = fp_wrapper{{1, 2, 3, 4}, {0, 0, 0, 0}, scale_type{0}}; auto const expected = wrapper{{0, 1, 0, 1}, {0, 0, 0, 0}}; - auto const result = cudf::jit::binary_operation( - col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); + auto const result = + cudf::binary_operation(col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2388,8 +2388,8 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualSimpleScale2Null) auto const col2 = fp_wrapper{{1, 2, 3, 4}, {0, 0, 0, 0}, scale_type{0}}; auto const expected = wrapper{{0, 1, 0, 1}, {0, 0, 0, 0}}; - auto const result = cudf::jit::binary_operation( - col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); + auto const result = + cudf::binary_operation(col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2415,8 +2415,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualLessGreater) cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, static_cast(iota_3).type(), static_cast(zeros_3).type()); - auto const iota_3_after_add = - cudf::jit::binary_operation(zeros_3, iota_3, binary_operator::ADD, type); + auto const iota_3_after_add = cudf::binary_operation(zeros_3, iota_3, binary_operator::ADD, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(iota_3, iota_3_after_add->view()); @@ -2427,15 +2426,15 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualLessGreater) auto const btype = cudf::data_type{type_id::BOOL8}; auto const equal_result = - cudf::jit::binary_operation(iota_3, iota_3_after_add->view(), binary_operator::EQUAL, btype); + cudf::binary_operation(iota_3, iota_3_after_add->view(), binary_operator::EQUAL, btype); CUDF_TEST_EXPECT_COLUMNS_EQUAL(true_col, equal_result->view()); auto const less_result = - cudf::jit::binary_operation(zeros_3, iota_3_after_add->view(), binary_operator::LESS, btype); + cudf::binary_operation(zeros_3, iota_3_after_add->view(), binary_operator::LESS, btype); CUDF_TEST_EXPECT_COLUMNS_EQUAL(true_col, less_result->view()); auto const greater_result = - cudf::jit::binary_operation(iota_3_after_add->view(), zeros_3, binary_operator::GREATER, btype); + cudf::binary_operation(iota_3_after_add->view(), zeros_3, binary_operator::GREATER, btype); CUDF_TEST_EXPECT_COLUMNS_EQUAL(true_col, greater_result->view()); } @@ -2454,7 +2453,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpNullMaxSimple) cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::NULL_MAX, static_cast(col1).type(), static_cast(col2).type()); - auto const result = cudf::jit::binary_operation(col1, col2, binary_operator::NULL_MAX, type); + auto const result = cudf::binary_operation(col1, col2, binary_operator::NULL_MAX, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2474,7 +2473,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpNullMinSimple) cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::NULL_MIN, static_cast(col1).type(), static_cast(col2).type()); - auto const result = cudf::jit::binary_operation(col1, col2, binary_operator::NULL_MIN, type); + auto const result = cudf::binary_operation(col1, col2, binary_operator::NULL_MIN, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2490,7 +2489,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpNullEqualsSimple) auto const col2 = fp_wrapper{{40, 200, 20, 400}, {1, 0, 1, 0}, scale_type{-1}}; auto const expected = wrapper{{1, 0, 0, 1}, {1, 1, 1, 1}}; - auto const result = cudf::jit::binary_operation( + auto const result = cudf::binary_operation( col1, col2, binary_operator::NULL_EQUALS, cudf::data_type{type_id::BOOL8}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); @@ -2507,7 +2506,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div) auto const expected = fp_wrapper{{25, 75, 125, 175}, scale_type{-2}}; auto const type = data_type{type_to_id(), -2}; - auto const result = cudf::jit::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2523,7 +2522,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div2) auto const expected = fp_wrapper{{5000, 15000, 25000, 35000}, scale_type{-2}}; auto const type = data_type{type_to_id(), -2}; - auto const result = cudf::jit::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2539,7 +2538,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div3) auto const expected = fp_wrapper{{3333, 3333, 16666, 23333}, scale_type{-2}}; auto const type = data_type{type_to_id(), -2}; - auto const result = cudf::jit::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2555,7 +2554,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div4) auto const expected = fp_wrapper{{3, 10, 16, 23}, scale_type{1}}; auto const type = data_type{type_to_id(), 1}; - auto const result = cudf::jit::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); + auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2572,7 +2571,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div6) auto const expected = fp_wrapper{{300, 100, 60, 42}, scale_type{-2}}; auto const type = data_type{type_to_id(), -2}; - auto const result = cudf::jit::binary_operation(*lhs, rhs, cudf::binary_operator::DIV, type); + auto const result = cudf::binary_operation(*lhs, rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2589,7 +2588,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div7) auto const expected = fp_wrapper{{12, 6, 4, 2, 2, 1, 1, 0}, scale_type{2}}; auto const type = data_type{type_to_id(), 2}; - auto const result = cudf::jit::binary_operation(*lhs, rhs, cudf::binary_operator::DIV, type); + auto const result = cudf::binary_operation(*lhs, rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2605,7 +2604,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div8) auto const expected = fp_wrapper{{0, 1, 16}, scale_type{2}}; auto const type = data_type{type_to_id(), 2}; - auto const result = cudf::jit::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); + auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2621,7 +2620,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div9) auto const expected = fp_wrapper{{1, 2, 4}, scale_type{1}}; auto const type = data_type{type_to_id(), 1}; - auto const result = cudf::jit::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); + auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2637,7 +2636,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div10) auto const expected = fp_wrapper{{14, 28, 42}, scale_type{1}}; auto const type = data_type{type_to_id(), 1}; - auto const result = cudf::jit::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); + auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2653,7 +2652,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div11) auto const expected = fp_wrapper{{142, 285, 428}, scale_type{1}}; auto const type = data_type{type_to_id(), 1}; - auto const result = cudf::jit::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); + auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } @@ -2667,9 +2666,9 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpThrows) auto const col = fp_wrapper{{100, 300, 500, 700}, scale_type{-2}}; auto const non_bool_type = data_type{type_to_id(), -2}; auto const float_type = data_type{type_id::FLOAT32}; - EXPECT_THROW(cudf::jit::binary_operation(col, col, cudf::binary_operator::LESS, non_bool_type), + EXPECT_THROW(cudf::binary_operation(col, col, cudf::binary_operator::LESS, non_bool_type), cudf::logic_error); - EXPECT_THROW(cudf::jit::binary_operation(col, col, cudf::binary_operator::MUL, float_type), + EXPECT_THROW(cudf::binary_operation(col, col, cudf::binary_operator::MUL, float_type), cudf::logic_error); } From 5ebd1bbef5e8a41aa1d3f623639bb90fd624eb20 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Fri, 17 Sep 2021 23:33:46 +0000 Subject: [PATCH 052/112] add checks to jit binary op --- cpp/src/binaryop/binaryop.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index 486ace3424f..97716e3b07f 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -432,11 +432,10 @@ std::unique_ptr binary_operation(scalar const& lhs, if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING) return detail::binary_operation(lhs, rhs, op, output_type, stream, mr); - // if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type())) TODO - // return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr); - // Check for datatype CUDF_EXPECTS(is_fixed_width(output_type), "Invalid/Unsupported output datatype"); + CUDF_EXPECTS(not is_fixed_point(lhs.type()), "Invalid/Unsupported lhs datatype"); + CUDF_EXPECTS(not is_fixed_point(rhs.type()), "Invalid/Unsupported rhs datatype"); CUDF_EXPECTS(is_fixed_width(lhs.type()), "Invalid/Unsupported lhs datatype"); CUDF_EXPECTS(is_fixed_width(rhs.type()), "Invalid/Unsupported rhs datatype"); @@ -460,11 +459,10 @@ std::unique_ptr binary_operation(column_view const& lhs, if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING) return detail::binary_operation(lhs, rhs, op, output_type, stream, mr); - // if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type())) - // return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr); - // Check for datatype CUDF_EXPECTS(is_fixed_width(output_type), "Invalid/Unsupported output datatype"); + CUDF_EXPECTS(not is_fixed_point(lhs.type()), "Invalid/Unsupported lhs datatype"); + CUDF_EXPECTS(not is_fixed_point(rhs.type()), "Invalid/Unsupported rhs datatype"); CUDF_EXPECTS(is_fixed_width(lhs.type()), "Invalid/Unsupported lhs datatype"); CUDF_EXPECTS(is_fixed_width(rhs.type()), "Invalid/Unsupported rhs datatype"); @@ -490,11 +488,10 @@ std::unique_ptr binary_operation(column_view const& lhs, if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING) return detail::binary_operation(lhs, rhs, op, output_type, stream, mr); - // if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type())) // TODO - // return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr); - // Check for datatype CUDF_EXPECTS(is_fixed_width(output_type), "Invalid/Unsupported output datatype"); + CUDF_EXPECTS(not is_fixed_point(lhs.type()), "Invalid/Unsupported lhs datatype"); + CUDF_EXPECTS(not is_fixed_point(rhs.type()), "Invalid/Unsupported rhs datatype"); CUDF_EXPECTS(is_fixed_width(lhs.type()), "Invalid/Unsupported lhs datatype"); CUDF_EXPECTS(is_fixed_width(rhs.type()), "Invalid/Unsupported rhs datatype"); From cb4e38910edc512fd1971783f947b2c2574584e3 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 21 Sep 2021 04:12:14 +0000 Subject: [PATCH 053/112] Final changes for binary ops --- cpp/src/binaryop/binaryop.cpp | 50 ++++++++++++---------- cpp/tests/binaryop/binop-compiled-test.cpp | 26 ++++------- 2 files changed, 36 insertions(+), 40 deletions(-) diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index 97716e3b07f..9c8817a6b4d 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -266,6 +266,28 @@ void binary_operation(mutable_column_view& out, // Compiled Binary operation namespace compiled { + +template +void fixed_point_binary_operation_validation(binary_operator op, + Lhs lhs, + Rhs rhs, + thrust::optional output_type = {}) +{ + CUDF_EXPECTS(is_fixed_point(lhs), "Input must have fixed_point data_type."); + CUDF_EXPECTS(is_fixed_point(rhs), "Input must have fixed_point data_type."); + CUDF_EXPECTS(binops::is_supported_fixed_point_binop(op), + "Unsupported fixed_point binary operation"); + CUDF_EXPECTS(lhs.id() == rhs.id(), "Data type mismatch"); + if (output_type.has_value()) { + if (binops::is_comparison_binop(op)) + CUDF_EXPECTS(output_type == cudf::data_type{type_id::BOOL8}, + "Comparison operations require boolean output type."); + else + CUDF_EXPECTS(is_fixed_point(output_type.value()), + "fixed_point binary operations require fixed_point output type."); + } +} + /** * @copydoc cudf::binary_operation(column_view const&, column_view const&, * binary_operator, data_type, rmm::mr::device_memory_resource*) @@ -291,6 +313,11 @@ std::unique_ptr binary_operation(LhsType const& lhs, if (not cudf::binops::compiled::is_supported_operation(output_type, lhs.type(), rhs.type(), op)) CUDF_FAIL("Unsupported operator for these types"); + if (cudf::is_fixed_point(lhs.type()) or cudf::is_fixed_point(rhs.type())) { + cudf::binops::compiled::fixed_point_binary_operation_validation( + op, lhs.type(), rhs.type(), output_type); + } + auto out = make_fixed_width_column_for_output(lhs, rhs, op, output_type, stream, mr); if constexpr (std::is_same_v) @@ -398,27 +425,6 @@ std::unique_ptr make_fixed_width_column_for_output(column_view const& lh } }; -template -void fixed_point_binary_operation_validation(binary_operator op, - Lhs lhs, - Rhs rhs, - thrust::optional output_type = {}) -{ - CUDF_EXPECTS(is_fixed_point(lhs), "Input must have fixed_point data_type."); - CUDF_EXPECTS(is_fixed_point(rhs), "Input must have fixed_point data_type."); - CUDF_EXPECTS(binops::is_supported_fixed_point_binop(op), - "Unsupported fixed_point binary operation"); - CUDF_EXPECTS(lhs.id() == rhs.id(), "Data type mismatch"); - if (output_type.has_value()) { - if (binops::is_comparison_binop(op)) - CUDF_EXPECTS(output_type == cudf::data_type{type_id::BOOL8}, - "Comparison operations require boolean output type."); - else - CUDF_EXPECTS(is_fixed_point(output_type.value()), - "fixed_point binary operations require fixed_point output type."); - } -} - namespace jit { std::unique_ptr binary_operation(scalar const& lhs, @@ -617,7 +623,7 @@ cudf::data_type binary_operation_fixed_point_output_type(binary_operator op, cudf::data_type const& lhs, cudf::data_type const& rhs) { - cudf::detail::fixed_point_binary_operation_validation(op, lhs, rhs); + cudf::binops::compiled::fixed_point_binary_operation_validation(op, lhs, rhs); auto const scale = binary_operation_fixed_point_scale(op, lhs.scale(), rhs.scale()); return cudf::data_type{lhs.id(), scale}; diff --git a/cpp/tests/binaryop/binop-compiled-test.cpp b/cpp/tests/binaryop/binop-compiled-test.cpp index 25d2f1d2c24..8fa82a07db7 100644 --- a/cpp/tests/binaryop/binop-compiled-test.cpp +++ b/cpp/tests/binaryop/binop-compiled-test.cpp @@ -115,7 +115,6 @@ using Add_types = cudf::test::Types, // cudf::test::Types, //valid cudf::test::Types, - cudf::test::Types, // Extras cudf::test::Types, cudf::test::Types, @@ -144,8 +143,7 @@ using Sub_types = cudf::test::Types, // t - d cudf::test::Types, // d - d cudf::test::Types, // d - d - cudf::test::Types, - cudf::test::Types>; + cudf::test::Types>; template struct BinaryOperationCompiledTest_Sub : public BinaryOperationCompiledTest { }; @@ -166,9 +164,7 @@ using Mul_types = cudf::test::Types, cudf::test::Types, cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types>; + cudf::test::Types>; template struct BinaryOperationCompiledTest_Mul : public BinaryOperationCompiledTest { }; @@ -192,8 +188,7 @@ using Div_types = cudf::test::Types, cudf::test::Types, cudf::test::Types, - cudf::test::Types, - cudf::test::Types>; + cudf::test::Types>; template struct BinaryOperationCompiledTest_Div : public BinaryOperationCompiledTest { }; @@ -209,13 +204,10 @@ TYPED_TEST(BinaryOperationCompiledTest_Div, Vector_Vector) // n n / n // t // d -using TrueDiv_types = - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types>; +using TrueDiv_types = cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types>; template struct BinaryOperationCompiledTest_TrueDiv : public BinaryOperationCompiledTest { }; @@ -519,9 +511,7 @@ using Null_types = cudf::test::Types, cudf::test::Types, // cudf::test::Types, // only fixed-width - cudf::test::Types, - cudf::test::Types, - cudf::test::Types>; + cudf::test::Types>; template struct BinaryOperationCompiledTest_NullOps : public BinaryOperationCompiledTest { From 4c81f57fd36309eecf8d286ee42428931d5cc0de Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 21 Sep 2021 04:22:28 +0000 Subject: [PATCH 054/112] Add more binop tests --- cpp/tests/binaryop/binop-compiled-test.cpp | 102 ++++++++++++--------- 1 file changed, 57 insertions(+), 45 deletions(-) diff --git a/cpp/tests/binaryop/binop-compiled-test.cpp b/cpp/tests/binaryop/binop-compiled-test.cpp index 8fa82a07db7..206b0252abf 100644 --- a/cpp/tests/binaryop/binop-compiled-test.cpp +++ b/cpp/tests/binaryop/binop-compiled-test.cpp @@ -107,20 +107,22 @@ struct BinaryOperationCompiledTest : public BinaryOperationTest { // t t + d // d d + t d + d -using Add_types = - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - // cudf::test::Types, //valid - cudf::test::Types, - // Extras - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types>; +using Add_types = cudf::test::Types< + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + // cudf::test::Types, //valid + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + // Extras + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types>; template struct BinaryOperationCompiledTest_Add : public BinaryOperationCompiledTest { }; @@ -137,13 +139,15 @@ TYPED_TEST(BinaryOperationCompiledTest_Add, Vector_Vector) // t t - t t - d // d d - d -using Sub_types = - cudf::test::Types, // n - n - cudf::test::Types, // t - t - cudf::test::Types, // t - d - cudf::test::Types, // d - d - cudf::test::Types, // d - d - cudf::test::Types>; +using Sub_types = cudf::test::Types< + cudf::test::Types, // n - n + cudf::test::Types, // t - t + cudf::test::Types, // t - d + cudf::test::Types, // d - d + cudf::test::Types, // d - d + cudf::test::Types, + cudf::test::Types, + cudf::test::Types>; template struct BinaryOperationCompiledTest_Sub : public BinaryOperationCompiledTest { }; @@ -159,12 +163,14 @@ TYPED_TEST(BinaryOperationCompiledTest_Sub, Vector_Vector) // n n * n n * d // t // d d * n -using Mul_types = - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types>; +using Mul_types = cudf::test::Types< + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types>; template struct BinaryOperationCompiledTest_Mul : public BinaryOperationCompiledTest { }; @@ -180,15 +186,17 @@ TYPED_TEST(BinaryOperationCompiledTest_Mul, Vector_Vector) // n n / n // t // d d / n d / d -using Div_types = - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types>; +using Div_types = cudf::test::Types< + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types>; template struct BinaryOperationCompiledTest_Div : public BinaryOperationCompiledTest { }; @@ -459,7 +467,9 @@ using Comparison_types = cudf::test::Types, cudf::test::Types, cudf::test::Types, - cudf::test::Types>; + cudf::test::Types, + cudf::test::Types, + cudf::test::Types>; template struct BinaryOperationCompiledTest_Comparison : public BinaryOperationCompiledTest { @@ -504,14 +514,16 @@ TYPED_TEST(BinaryOperationCompiledTest_Comparison, GreaterEqual_Vector_Vector) // d . // s . // dc . . -using Null_types = - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - // cudf::test::Types, // only fixed-width - cudf::test::Types>; +using Null_types = cudf::test::Types< + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + // cudf::test::Types, // only fixed-width + cudf::test::Types, + cudf::test::Types, + cudf::test::Types>; template struct BinaryOperationCompiledTest_NullOps : public BinaryOperationCompiledTest { From 58b23cd2c46dbccc72fb0300ba1c45b1d5033b6d Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 22 Sep 2021 05:33:30 +0000 Subject: [PATCH 055/112] Temporary fix for chrono groupby min_tests --- cpp/include/cudf/detail/utilities/device_operators.cuh | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/include/cudf/detail/utilities/device_operators.cuh b/cpp/include/cudf/detail/utilities/device_operators.cuh index f8792061612..1ebecdf1a83 100644 --- a/cpp/include/cudf/detail/utilities/device_operators.cuh +++ b/cpp/include/cudf/detail/utilities/device_operators.cuh @@ -99,6 +99,7 @@ struct DeviceMin { !cudf::is_fixed_point()>* = nullptr> static constexpr T identity() { + if constexpr (cudf::is_chrono()) return std::numeric_limits::max(); return cuda::std::numeric_limits::max(); } From 1f3284fd37851fef70da3776fa9924de4221b8b1 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 22 Sep 2021 15:42:07 +0000 Subject: [PATCH 056/112] decimal128 comparision tests --- cpp/tests/binaryop/binop-integration-test.cpp | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/cpp/tests/binaryop/binop-integration-test.cpp b/cpp/tests/binaryop/binop-integration-test.cpp index a0847f3eff2..d2c39454f90 100644 --- a/cpp/tests/binaryop/binop-integration-test.cpp +++ b/cpp/tests/binaryop/binop-integration-test.cpp @@ -2672,6 +2672,38 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpThrows) cudf::logic_error); } +template +struct FixedPointTest_64_128_Reps : public cudf::test::BaseFixture { +}; + +using Decimal64And128Types = cudf::test::Types; +TYPED_TEST_CASE(FixedPointTest_64_128_Reps, Decimal64And128Types); + +TYPED_TEST(FixedPointTest_64_128_Reps, FixedPoint_64_128_ComparisonTests) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + for (auto const rhs_value : {10000000000000000, 100000000000000000}) { + auto const lhs = fp_wrapper{{33041, 97290, 36438, 25379, 48473}, scale_type{2}}; + auto const rhs = make_fixed_point_scalar(rhs_value, scale_type{0}); + auto const trues = wrapper{{1, 1, 1, 1, 1}}; + auto const falses = wrapper{{0, 0, 0, 0, 0}}; + auto const bool_type = cudf::data_type{type_id::BOOL8}; + + auto const a = cudf::binary_operation(lhs, *rhs, binary_operator::LESS, bool_type); + auto const b = cudf::binary_operation(lhs, *rhs, binary_operator::LESS_EQUAL, bool_type); + auto const c = cudf::binary_operation(lhs, *rhs, binary_operator::GREATER, bool_type); + auto const d = cudf::binary_operation(lhs, *rhs, binary_operator::GREATER_EQUAL, bool_type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(trues, a->view()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(trues, b->view()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(falses, c->view()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(falses, d->view()); + } +} + } // namespace binop } // namespace test } // namespace cudf From 7713bc4aeac00c8ac02f8c431a24fd950e3a0531 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 22 Sep 2021 15:59:00 +0000 Subject: [PATCH 057/112] Enhance decimal128 comparison tests --- cpp/tests/binaryop/binop-integration-test.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cpp/tests/binaryop/binop-integration-test.cpp b/cpp/tests/binaryop/binop-integration-test.cpp index d2c39454f90..495b6c09698 100644 --- a/cpp/tests/binaryop/binop-integration-test.cpp +++ b/cpp/tests/binaryop/binop-integration-test.cpp @@ -2696,11 +2696,19 @@ TYPED_TEST(FixedPointTest_64_128_Reps, FixedPoint_64_128_ComparisonTests) auto const b = cudf::binary_operation(lhs, *rhs, binary_operator::LESS_EQUAL, bool_type); auto const c = cudf::binary_operation(lhs, *rhs, binary_operator::GREATER, bool_type); auto const d = cudf::binary_operation(lhs, *rhs, binary_operator::GREATER_EQUAL, bool_type); + auto const e = cudf::binary_operation(*rhs, lhs, binary_operator::GREATER, bool_type); + auto const f = cudf::binary_operation(*rhs, lhs, binary_operator::GREATER_EQUAL, bool_type); + auto const g = cudf::binary_operation(*rhs, lhs, binary_operator::LESS, bool_type); + auto const h = cudf::binary_operation(*rhs, lhs, binary_operator::LESS_EQUAL, bool_type); CUDF_TEST_EXPECT_COLUMNS_EQUAL(trues, a->view()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(trues, b->view()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(falses, c->view()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(falses, d->view()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(trues, e->view()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(trues, f->view()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(falses, g->view()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(falses, h->view()); } } From 2de00b8b3acd8c2fe6cc38df39b85c195c613e81 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 22 Sep 2021 16:15:41 +0000 Subject: [PATCH 058/112] small cleanup --- cpp/tests/binaryop/binop-integration-test.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/tests/binaryop/binop-integration-test.cpp b/cpp/tests/binaryop/binop-integration-test.cpp index 495b6c09698..6b7d8ead299 100644 --- a/cpp/tests/binaryop/binop-integration-test.cpp +++ b/cpp/tests/binaryop/binop-integration-test.cpp @@ -2701,14 +2701,14 @@ TYPED_TEST(FixedPointTest_64_128_Reps, FixedPoint_64_128_ComparisonTests) auto const g = cudf::binary_operation(*rhs, lhs, binary_operator::LESS, bool_type); auto const h = cudf::binary_operation(*rhs, lhs, binary_operator::LESS_EQUAL, bool_type); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(trues, a->view()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(trues, b->view()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(falses, c->view()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(falses, d->view()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(trues, e->view()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(trues, f->view()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(falses, g->view()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(falses, h->view()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(a->view(), trues); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(b->view(), trues); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(c->view(), falses); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d->view(), falses); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(e->view(), trues); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(f->view(), trues); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(g->view(), falses); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(h->view(), falses); } } From ea36188cc65ecebfcec41af22b0bbeebc04b3229 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Thu, 23 Sep 2021 02:21:14 +0000 Subject: [PATCH 059/112] cleanup --- cpp/src/aggregation/aggregation.cu | 3 +-- cpp/src/groupby/hash/groupby.cu | 7 ++++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/cpp/src/aggregation/aggregation.cu b/cpp/src/aggregation/aggregation.cu index b4d4b99b87a..02998b84ffd 100644 --- a/cpp/src/aggregation/aggregation.cu +++ b/cpp/src/aggregation/aggregation.cu @@ -28,8 +28,7 @@ void initialize_with_identity(mutable_table_view& table, // kernel per column for (size_type i = 0; i < table.num_columns(); ++i) { auto col = table.column(i); - dispatch_type_and_aggregation( - col.type(), aggs[i], identity_initializer{}, col, stream); // TODO SFINAE for decimal + dispatch_type_and_aggregation(col.type(), aggs[i], identity_initializer{}, col, stream); } } diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 2260b39b3c7..77b23e06a09 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -628,7 +628,6 @@ std::unique_ptr
groupby_null_templated(table_view const& keys, } // namespace -// TODO move this to more appropriate file struct has_atomic_support_type_dispatcher { template bool operator()() @@ -637,6 +636,12 @@ struct has_atomic_support_type_dispatcher { } }; +/** + * @brief Indicates whether `type` has support for atomics + * + * @param type The `data_type` that is being checked + * @return `true` if `type` has support for atomics, `false` otherwise + */ bool has_atomic_support(cudf::data_type const& type) { return type_dispatcher(type, has_atomic_support_type_dispatcher{}); From d093ae82778469b1cead190044b43113cc23e1ce Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Tue, 5 Oct 2021 05:17:09 -0500 Subject: [PATCH 060/112] Fix rounding issues with DECIMAL128 --- cpp/src/round/round.cu | 10 +++++----- cpp/tests/round/round_tests.cpp | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index 3a6a2beda45..b4472c5b61b 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -46,26 +46,26 @@ inline double __device__ generic_round_half_even(double d) { return rint(d); } inline float __device__ generic_modf(float a, float* b) { return modff(a, b); } inline double __device__ generic_modf(double a, double* b) { return modf(a, b); } -template ::value>* = nullptr> +template ::value>* = nullptr> T __device__ generic_abs(T value) { - return abs(value); + return value < 0 ? -value : value; } -template ::value>* = nullptr> +template ::value>* = nullptr> T __device__ generic_abs(T value) { return value; } -template ::value>* = nullptr> +template ::value>* = nullptr> int16_t __device__ generic_sign(T value) { return value < 0 ? -1 : 1; } // this is needed to suppress warning: pointless comparison of unsigned integer with zero -template ::value>* = nullptr> +template ::value>* = nullptr> int16_t __device__ generic_sign(T) { return 1; diff --git a/cpp/tests/round/round_tests.cpp b/cpp/tests/round/round_tests.cpp index b4050625570..4d1f66443c2 100644 --- a/cpp/tests/round/round_tests.cpp +++ b/cpp/tests/round/round_tests.cpp @@ -587,6 +587,21 @@ TEST_F(RoundTests, Int64AtBoundaryHalfUp) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected5, result5->view()); } +TEST_F(RoundTests, FixedPoint128HalfUp) +{ + using namespace numeric; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + { + auto const input = fp_wrapper{{-160714515306}, scale_type{-13}}; + auto const expected = fp_wrapper{{-16071451531}, scale_type{-12}}; + auto const result = cudf::round(input, 12, cudf::rounding_method::HALF_UP); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } +} + TEST_F(RoundTests, FixedPointAtBoundaryTestHalfUp) { using namespace numeric; From 7eedaea94ae18681c269ab909fe979ca19fae89a Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 6 Oct 2021 15:46:33 +0000 Subject: [PATCH 061/112] Use numeric::detail::abs in round.cu --- cpp/src/round/round.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index b4472c5b61b..36dd2dabd72 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -49,7 +50,7 @@ inline double __device__ generic_modf(double a, double* b) { return modf(a, b); template ::value>* = nullptr> T __device__ generic_abs(T value) { - return value < 0 ? -value : value; + return numeric::detail::abs(value); } template ::value>* = nullptr> From a8109278c20acb96e4eab396a5d7b20530b0dc11 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 20 Oct 2021 16:35:05 -0400 Subject: [PATCH 062/112] Add cuda:: and if constexpr check --- cpp/include/cudf/detail/utilities/device_operators.cuh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/device_operators.cuh b/cpp/include/cudf/detail/utilities/device_operators.cuh index 1ebecdf1a83..8867cc9bf68 100644 --- a/cpp/include/cudf/detail/utilities/device_operators.cuh +++ b/cpp/include/cudf/detail/utilities/device_operators.cuh @@ -107,7 +107,7 @@ struct DeviceMin { static constexpr T identity() { CUDF_FAIL("fixed_point does not yet support DeviceMin identity"); - return std::numeric_limits::max(); + return cuda::std::numeric_limits::max(); } // @brief identity specialized for string_view @@ -138,6 +138,7 @@ struct DeviceMax { !cudf::is_fixed_point()>* = nullptr> static constexpr T identity() { + if constexpr (cudf::is_chrono()) return std::numeric_limits::lowest(); return cuda::std::numeric_limits::lowest(); } @@ -145,7 +146,7 @@ struct DeviceMax { static constexpr T identity() { CUDF_FAIL("fixed_point does not yet support DeviceMax identity"); - return std::numeric_limits::lowest(); + return cuda::std::numeric_limits::lowest(); } template >* = nullptr> From 9286b43f82d99123ef6ab9f8e4d71cd1aa440881 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 20 Oct 2021 16:47:48 -0400 Subject: [PATCH 063/112] Clang format :) --- cpp/src/binaryop/binaryop.cpp | 2 +- cpp/src/io/json/json_gpu.cu | 8 +++---- cpp/src/io/parquet/reader_impl.cu | 6 ++--- .../strings/convert/convert_fixed_point.cu | 22 +++++++++---------- cpp/tests/binaryop/binop-integration-test.cpp | 4 ++-- cpp/tests/copying/concatenate_tests.cu | 14 ++++++------ cpp/tests/merge/merge_test.cpp | 4 ++-- cpp/tests/reductions/reduction_tests.cpp | 4 ++-- cpp/tests/reductions/scan_tests.cpp | 2 +- cpp/tests/strings/fixed_point_tests.cpp | 6 ++--- cpp/tests/transform/row_bit_count_test.cu | 8 +++---- 11 files changed, 40 insertions(+), 40 deletions(-) diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index d9d1866edea..c09962219a9 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -597,7 +597,7 @@ std::unique_ptr binary_operation(column_view const& lhs, auto new_mask = bitmask_and(table_view({lhs, rhs}), stream, mr); auto out = make_fixed_width_column( - output_type, lhs.size(), std::move(new_mask), cudf::UNKNOWN_NULL_COUNT, stream, mr); + output_type, lhs.size(), std::move(new_mask), cudf::UNKNOWN_NULL_COUNT, stream, mr); // Check for 0 sized data if (lhs.is_empty() or rhs.is_empty()) return out; diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu index 9358e7f7709..ec10e32e55d 100644 --- a/cpp/src/io/json/json_gpu.cu +++ b/cpp/src/io/json/json_gpu.cu @@ -62,9 +62,9 @@ __device__ std::pair limit_range_to_brackets(char cons auto const data_begin = thrust::next(thrust::find_if( thrust::seq, begin, end, [] __device__(auto c) { return c == '[' || c == '{'; })); auto const data_end = thrust::next(thrust::find_if(thrust::seq, - thrust::make_reverse_iterator(end), - thrust::make_reverse_iterator(data_begin), - [](auto c) { return c == ']' || c == '}'; })) + thrust::make_reverse_iterator(end), + thrust::make_reverse_iterator(data_begin), + [](auto c) { return c == ']' || c == '}'; })) .base(); return {data_begin, data_end}; } @@ -565,7 +565,7 @@ __global__ void detect_data_types_kernel( bool is_negative = (*desc.value_begin == '-'); char const* data_begin = desc.value_begin + (is_negative || (*desc.value_begin == '+')); cudf::size_type* ptr = cudf::io::gpu::infer_integral_field_counter( - data_begin, data_begin + digit_count, is_negative, column_infos[desc.column]); + data_begin, data_begin + digit_count, is_negative, column_infos[desc.column]); atomicAdd(ptr, 1); } else if (is_like_float( value_len, digit_count, decimal_count, dash_count + plus_count, exponent_count)) { diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu index 6d4fbcdf4e4..2e606610c4e 100644 --- a/cpp/src/io/parquet/reader_impl.cu +++ b/cpp/src/io/parquet/reader_impl.cu @@ -752,9 +752,9 @@ class aggregate_metadata { // Check if the path exists in our selected_columns and if not, add it. auto const& name_to_find = path[depth]; auto found_col = std::find_if( - array_to_find_in->begin(), - array_to_find_in->end(), - [&name_to_find](column_name_info const& col) { return col.name == name_to_find; }); + array_to_find_in->begin(), + array_to_find_in->end(), + [&name_to_find](column_name_info const& col) { return col.name == name_to_find; }); if (found_col == array_to_find_in->end()) { auto& col = array_to_find_in->emplace_back(name_to_find); array_to_find_in = &col.children; diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu index 1d94c94f5b3..ba96f8ebe89 100644 --- a/cpp/src/strings/convert/convert_fixed_point.cu +++ b/cpp/src/strings/convert/convert_fixed_point.cu @@ -139,11 +139,11 @@ struct dispatch_to_fixed_point_fn { // create output column auto results = make_fixed_point_column(output_type, - input.size(), - cudf::detail::copy_bitmask(input.parent(), stream, mr), - input.null_count(), - stream, - mr); + input.size(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + input.null_count(), + stream, + mr); auto d_results = results->mutable_view().data(); // convert strings into decimal values @@ -211,7 +211,7 @@ struct decimal_to_string_size_fn { auto const abs_value = numeric::detail::abs(value); auto const exp_ten = static_cast(exp10( - static_cast(-scale))); // TODO probably broken (might need numeric::detail::exp10) + static_cast(-scale))); // TODO probably broken (might need numeric::detail::exp10) auto const fraction = count_digits(abs_value % exp_ten); auto const num_zeros = std::max(0, (-scale - fraction)); return static_cast(value < 0) + // sign if negative @@ -349,11 +349,11 @@ struct dispatch_is_fixed_point_fn { // create output column auto results = make_numeric_column(data_type{type_id::BOOL8}, - input.size(), - cudf::detail::copy_bitmask(input.parent(), stream, mr), - input.null_count(), - stream, - mr); + input.size(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + input.null_count(), + stream, + mr); auto d_results = results->mutable_view().data(); // check strings for valid fixed-point chars diff --git a/cpp/tests/binaryop/binop-integration-test.cpp b/cpp/tests/binaryop/binop-integration-test.cpp index 6b7d8ead299..fa3d9d13f0d 100644 --- a/cpp/tests/binaryop/binop-integration-test.cpp +++ b/cpp/tests/binaryop/binop-integration-test.cpp @@ -2034,7 +2034,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd) auto begin = cudf::detail::make_counting_transform_iterator(1, [](auto i) { return decimalXX{i, scale_type{0}}; - }); + }); auto const vec1 = std::vector(begin, begin + sz); auto const vec2 = std::vector(sz, decimalXX{2, scale_type{0}}); auto expected = std::vector(sz); @@ -2067,7 +2067,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpMultiply) auto begin = cudf::detail::make_counting_transform_iterator(1, [](auto i) { return decimalXX{i, scale_type{0}}; - }); + }); auto const vec1 = std::vector(begin, begin + sz); auto const vec2 = std::vector(sz, decimalXX{2, scale_type{0}}); auto expected = std::vector(sz); diff --git a/cpp/tests/copying/concatenate_tests.cu b/cpp/tests/copying/concatenate_tests.cu index 38a7fa4db58..98ffb121a9d 100644 --- a/cpp/tests/copying/concatenate_tests.cu +++ b/cpp/tests/copying/concatenate_tests.cu @@ -361,7 +361,7 @@ TEST_F(OverflowTest, OverflowTest) auto offsets = cudf::test::fixed_width_column_wrapper{0, size}; auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, size); auto col = cudf::make_strings_column( - 1, offsets.release(), std::move(many_chars), 0, rmm::device_buffer{}); + 1, offsets.release(), std::move(many_chars), 0, rmm::device_buffer{}); table_view tbl({*col}); EXPECT_THROW(cudf::concatenate(std::vector({tbl, tbl, tbl, tbl, tbl, tbl})), @@ -376,7 +376,7 @@ TEST_F(OverflowTest, OverflowTest) auto many_offsets = cudf::make_fixed_width_column(data_type{type_id::INT32}, size + 1); auto chars = cudf::test::fixed_width_column_wrapper{0, 1, 2}; auto col = cudf::make_strings_column( - size, std::move(many_offsets), chars.release(), 0, rmm::device_buffer{}); + size, std::move(many_offsets), chars.release(), 0, rmm::device_buffer{}); table_view tbl({*col}); EXPECT_THROW(cudf::concatenate(std::vector({tbl, tbl, tbl, tbl, tbl, tbl})), @@ -484,7 +484,7 @@ TEST_F(OverflowTest, Presliced) cudf::test::fixed_width_column_wrapper offsets(offset_gen, offset_gen + num_rows + 1); auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, total_chars_size); auto col = cudf::make_strings_column( - num_rows, offsets.release(), std::move(many_chars), 0, rmm::device_buffer{}); + num_rows, offsets.release(), std::move(many_chars), 0, rmm::device_buffer{}); auto sliced = cudf::split(*col, {(num_rows / 2) - 1}); @@ -515,7 +515,7 @@ TEST_F(OverflowTest, Presliced) offsets->mutable_view().begin()); auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, total_chars_size); auto col = cudf::make_strings_column( - num_rows, std::move(offsets), std::move(many_chars), 0, rmm::device_buffer{}); + num_rows, std::move(offsets), std::move(many_chars), 0, rmm::device_buffer{}); // should pass (with 2 rows to spare) // leaving this disabled as it typically runs out of memory on a T4 @@ -684,7 +684,7 @@ TEST_F(OverflowTest, BigColumnsSmallSlices) offsets->mutable_view().begin()); auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, inner_size); auto col = cudf::make_strings_column( - num_rows, std::move(offsets), std::move(many_chars), 0, rmm::device_buffer{}); + num_rows, std::move(offsets), std::move(many_chars), 0, rmm::device_buffer{}); auto sliced = cudf::slice(*col, {16, 32}); @@ -712,7 +712,7 @@ TEST_F(OverflowTest, BigColumnsSmallSlices) offsets->mutable_view().begin()); auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, inner_size); auto col = cudf::make_lists_column( - num_rows, std::move(offsets), std::move(many_chars), 0, rmm::device_buffer{}); + num_rows, std::move(offsets), std::move(many_chars), 0, rmm::device_buffer{}); auto sliced = cudf::slice(*col, {16, 32}); @@ -740,7 +740,7 @@ TEST_F(OverflowTest, BigColumnsSmallSlices) offsets->mutable_view().begin()); auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, inner_size); auto list_col = cudf::make_lists_column( - num_rows, std::move(offsets), std::move(many_chars), 0, rmm::device_buffer{}); + num_rows, std::move(offsets), std::move(many_chars), 0, rmm::device_buffer{}); // struct std::vector> children; diff --git a/cpp/tests/merge/merge_test.cpp b/cpp/tests/merge/merge_test.cpp index de6eefb989a..64ab3d137d6 100644 --- a/cpp/tests/merge/merge_test.cpp +++ b/cpp/tests/merge/merge_test.cpp @@ -453,7 +453,7 @@ TYPED_TEST(MergeTest_, Merge1KeyNullColumns) } else { return row * 2; } - }); + }); auto valid_sequence1 = cudf::detail::make_counting_transform_iterator( 0, [inputRows](auto row) { return (row < inputRows - 1); }); cudf::test::fixed_width_column_wrapper @@ -698,7 +698,7 @@ TEST_F(MergeTest, KeysWithNulls) cudf::size_type nrows = 13200; // Ensures that thrust::merge uses more than one tile/block auto data_iter = thrust::make_counting_iterator(0); auto valids1 = cudf::detail::make_counting_transform_iterator( - 0, [](auto row) { return (row % 10 == 0) ? false : true; }); + 0, [](auto row) { return (row % 10 == 0) ? false : true; }); cudf::test::fixed_width_column_wrapper data1(data_iter, data_iter + nrows, valids1); auto valids2 = cudf::detail::make_counting_transform_iterator( 0, [](auto row) { return (row % 15 == 0) ? false : true; }); diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index a1ac942cdab..6f292ac6d98 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -1390,7 +1390,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionQuantile) for (auto const i : {0, 1, 2, 3, 4}) { auto const expected = decimalXX{scaled_integer{i + 1, scale}}; auto const result = cudf::reduce( - column, cudf::make_quantile_aggregation({i / 4.0}, cudf::interpolation::LINEAR), out_type); + column, cudf::make_quantile_aggregation({i / 4.0}, cudf::interpolation::LINEAR), out_type); auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->fixed_point_value(), expected); } @@ -1413,7 +1413,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionNthElement) for (auto const i : {0, 1, 2, 3}) { auto const expected = decimalXX{scaled_integer{values[i], scale}}; auto const result = cudf::reduce( - column, cudf::make_nth_element_aggregation(i, cudf::null_policy::INCLUDE), out_type); + column, cudf::make_nth_element_aggregation(i, cudf::null_policy::INCLUDE), out_type); auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->fixed_point_value(), expected); } diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp index aedc76c879c..87b329b36c1 100644 --- a/cpp/tests/reductions/scan_tests.cpp +++ b/cpp/tests/reductions/scan_tests.cpp @@ -410,7 +410,7 @@ TEST_F(ScanStringsTest, MoreStringsMinMax) return std::string(s); }); auto validity = cudf::detail::make_counting_transform_iterator( - 0, [](auto idx) -> bool { return (idx % 23) != 22; }); + 0, [](auto idx) -> bool { return (idx % 23) != 22; }); strings_column_wrapper col(data_begin, data_begin + row_count, validity); thrust::host_vector v(data_begin, data_begin + row_count); diff --git a/cpp/tests/strings/fixed_point_tests.cpp b/cpp/tests/strings/fixed_point_tests.cpp index 2b6883a080d..3ceaba2637b 100644 --- a/cpp/tests/strings/fixed_point_tests.cpp +++ b/cpp/tests/strings/fixed_point_tests.cpp @@ -193,19 +193,19 @@ TEST_F(StringsConvertTest, IsFixedPoint) "170141183460469231731687303715884105727", "170141183460469231731687303715884105728"}); results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers), - cudf::data_type{cudf::type_id::DECIMAL32}); + cudf::data_type{cudf::type_id::DECIMAL32}); auto const expected32 = cudf::test::fixed_width_column_wrapper( {true, true, false, false, false, false, false, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected32); results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers), - cudf::data_type{cudf::type_id::DECIMAL64}); + cudf::data_type{cudf::type_id::DECIMAL64}); auto const expected64 = cudf::test::fixed_width_column_wrapper( {true, true, true, true, true, false, false, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected64); results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers), - cudf::data_type{cudf::type_id::DECIMAL128}); + cudf::data_type{cudf::type_id::DECIMAL128}); auto const expected128 = cudf::test::fixed_width_column_wrapper( {true, true, true, true, true, true, true, false, true, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected128); diff --git a/cpp/tests/transform/row_bit_count_test.cu b/cpp/tests/transform/row_bit_count_test.cu index 73f4e1cb465..44a5ad44cea 100644 --- a/cpp/tests/transform/row_bit_count_test.cu +++ b/cpp/tests/transform/row_bit_count_test.cu @@ -489,10 +489,10 @@ TEST_F(RowBitCount, NestedTypes) l4_offsets.end()); auto const l4_size = l4_offsets.size() - 1; auto l4 = cudf::make_lists_column(static_cast(l4_size), - l4_offsets_col.release(), - innermost_struct.release(), - cudf::UNKNOWN_NULL_COUNT, - rmm::device_buffer{}); + l4_offsets_col.release(), + innermost_struct.release(), + cudf::UNKNOWN_NULL_COUNT, + rmm::device_buffer{}); // inner struct std::vector> inner_struct_children; From 4ad26f4fee0fd615943c243906741141defad3be Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Fri, 22 Oct 2021 00:31:39 -0400 Subject: [PATCH 064/112] Cleanup --- cpp/include/cudf_test/column_wrapper.hpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index f7ec3738b90..ccfb9d24a7d 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -511,9 +511,7 @@ class fixed_point_column_wrapper : public detail::column_wrapper { auto const size = cudf::distance(begin, end); auto const elements = thrust::host_vector(begin, end); - auto const id = std::is_same_v ? type_id::DECIMAL32 - : std::is_same_v ? type_id::DECIMAL64 - : type_id::DECIMAL128; + auto const id = type_to_id >(); auto const data_type = cudf::data_type{id, static_cast(scale)}; wrapped.reset(new cudf::column{ @@ -577,9 +575,7 @@ class fixed_point_column_wrapper : public detail::column_wrapper { auto const size = cudf::distance(begin, end); auto const elements = thrust::host_vector(begin, end); - auto const id = std::is_same_v ? type_id::DECIMAL32 - : std::is_same_v ? type_id::DECIMAL64 - : type_id::DECIMAL128; + auto const id = type_to_id >(); auto const data_type = cudf::data_type{id, static_cast(scale)}; wrapped.reset(new cudf::column{ From 3892e7346fe8922d7a17654526c5a56dbdf27986 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Fri, 22 Oct 2021 00:56:07 -0400 Subject: [PATCH 065/112] Cleanup --- .../cudf/column/column_device_view.cuh | 46 ++----------------- 1 file changed, 5 insertions(+), 41 deletions(-) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 1ab259be00a..505ff33ec72 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -421,57 +421,21 @@ class alignas(16) column_device_view : public detail::column_device_view_base { } /** - * @brief Returns a `numeric::decimal32` element at the specified index for a `fixed_point` - * column. + * @brief Returns a `numeric::fixed_point` element at the specified index for a `fixed_point` column. * * If the element at the specified index is NULL, i.e., `is_null(element_index) == true`, * then any attempt to use the result will lead to undefined behavior. * * @param element_index Position of the desired element - * @return numeric::decimal32 representing the element at this index + * @return numeric::fixed_point representing the element at this index */ - template )> + template ())> __device__ T element(size_type element_index) const noexcept { using namespace numeric; + using rep = typename T::rep; auto const scale = scale_type{_type.scale()}; - return decimal32{scaled_integer{data()[element_index], scale}}; - } - - /** - * @brief Returns a `numeric::decimal64` element at the specified index for a `fixed_point` - * column. - * - * If the element at the specified index is NULL, i.e., `is_null(element_index) == true`, - * then any attempt to use the result will lead to undefined behavior. - * - * @param element_index Position of the desired element - * @return numeric::decimal64 representing the element at this index - */ - template )> - __device__ T element(size_type element_index) const noexcept - { - using namespace numeric; - auto const scale = scale_type{_type.scale()}; - return decimal64{scaled_integer{data()[element_index], scale}}; - } - - /** - * @brief Returns a `numeric::decimal128` element at the specified index for a `fixed_point` - * column. - * - * If the element at the specified index is NULL, i.e., `is_null(element_index) == true`, - * then any attempt to use the result will lead to undefined behavior. - * - * @param element_index Position of the desired element - * @return numeric::decimal128 representing the element at this index - */ - template )> - __device__ T element(size_type element_index) const noexcept - { - using namespace numeric; - auto const scale = scale_type{_type.scale()}; - return decimal128{scaled_integer<__int128_t>{data<__int128_t>()[element_index], scale}}; + return T{scaled_integer{data()[element_index], scale}}; } /** From 8e9bd9020af53bab56d32ec780320530d607dac4 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Fri, 22 Oct 2021 00:57:43 -0400 Subject: [PATCH 066/112] Missing clang-format --- cpp/include/cudf/column/column_device_view.cuh | 5 +++-- cpp/include/cudf_test/column_wrapper.hpp | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 505ff33ec72..6ecb0796283 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -421,7 +421,8 @@ class alignas(16) column_device_view : public detail::column_device_view_base { } /** - * @brief Returns a `numeric::fixed_point` element at the specified index for a `fixed_point` column. + * @brief Returns a `numeric::fixed_point` element at the specified index for a `fixed_point` + * column. * * If the element at the specified index is NULL, i.e., `is_null(element_index) == true`, * then any attempt to use the result will lead to undefined behavior. @@ -433,7 +434,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { __device__ T element(size_type element_index) const noexcept { using namespace numeric; - using rep = typename T::rep; + using rep = typename T::rep; auto const scale = scale_type{_type.scale()}; return T{scaled_integer{data()[element_index], scale}}; } diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index ccfb9d24a7d..c228bea9257 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -511,7 +511,7 @@ class fixed_point_column_wrapper : public detail::column_wrapper { auto const size = cudf::distance(begin, end); auto const elements = thrust::host_vector(begin, end); - auto const id = type_to_id >(); + auto const id = type_to_id>(); auto const data_type = cudf::data_type{id, static_cast(scale)}; wrapped.reset(new cudf::column{ @@ -575,7 +575,7 @@ class fixed_point_column_wrapper : public detail::column_wrapper { auto const size = cudf::distance(begin, end); auto const elements = thrust::host_vector(begin, end); - auto const id = type_to_id >(); + auto const id = type_to_id>(); auto const data_type = cudf::data_type{id, static_cast(scale)}; wrapped.reset(new cudf::column{ From 41cc23a3a1e0119dbb8c9cd7444f35b227ae8156 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Fri, 22 Oct 2021 02:20:24 -0400 Subject: [PATCH 067/112] digits10 --- cpp/src/io/orc/writer_impl.cu | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 011053aefb9..866be896012 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -43,6 +43,8 @@ #include #include +#include + namespace cudf { namespace io { namespace detail { @@ -124,10 +126,11 @@ constexpr int32_t to_clockscale(cudf::type_id timestamp_id) */ constexpr auto orc_precision(cudf::type_id decimal_id) { + using namespace numeric; switch (decimal_id) { - case cudf::type_id::DECIMAL32: return 9; - case cudf::type_id::DECIMAL64: return 18; - case cudf::type_id::DECIMAL128: return 38; + case cudf::type_id::DECIMAL32: return cuda::std::numeric_limits::digits10; + case cudf::type_id::DECIMAL64: return cuda::std::numeric_limits::digits10; + case cudf::type_id::DECIMAL128: return cuda::std::numeric_limits::digits10; default: return 0; } } From 921ff12d58251c8db963cd56fa71e24cb3c716c9 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Fri, 22 Oct 2021 09:36:06 -0400 Subject: [PATCH 068/112] Clean up --- cpp/include/cudf/detail/copy_if.cuh | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh index 587bf3abf24..fb4c636fcb0 100644 --- a/cpp/include/cudf/detail/copy_if.cuh +++ b/cpp/include/cudf/detail/copy_if.cuh @@ -217,17 +217,7 @@ struct DeviceType()>> { }; template -struct DeviceType>> { - using type = typename cudf::device_storage_type_t; -}; - -template -struct DeviceType>> { - using type = typename cudf::device_storage_type_t; -}; - -template -struct DeviceType>> { +struct DeviceType()>> { using type = typename cudf::device_storage_type_t; }; From a5e4187f1e5e160eeeb075d684844ece22b664e7 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Fri, 22 Oct 2021 15:55:08 -0400 Subject: [PATCH 069/112] IO changes --- cpp/src/io/orc/reader_impl.cu | 10 +++------- cpp/src/io/orc/stripe_data.cu | 2 +- cpp/src/io/orc/stripe_enc.cu | 9 ++++----- cpp/src/io/orc/writer_impl.cu | 6 +++--- cpp/src/io/parquet/writer_impl.cu | 4 +--- 5 files changed, 12 insertions(+), 19 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index d1c8c3661f4..e49cf718740 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -81,7 +81,7 @@ constexpr type_id to_type_id(const orc::SchemaType& schema, case orc::DATE: // There isn't a (DAYS -> np.dtype) mapping return (use_np_dtypes) ? type_id::TIMESTAMP_MILLISECONDS : type_id::TIMESTAMP_DAYS; - case orc::DECIMAL: return (decimals_as_float64) ? type_id::FLOAT64 : type_id::DECIMAL64; + case orc::DECIMAL: return (decimals_as_float64) ? type_id::FLOAT64 : type_id::DECIMAL128; // Need to update once cuDF plans to support map type case orc::MAP: case orc::LIST: return type_id::LIST; @@ -1074,7 +1074,7 @@ std::unique_ptr reader::impl::create_empty_column(const int32_t orc_col_ break; case orc::DECIMAL: - if (type == type_id::DECIMAL64) { + if (type == type_id::DECIMAL128) { scale = -static_cast(_metadata->get_types()[orc_col_id].scale.value_or(0)); } out_col = make_empty_column(data_type(type, scale)); @@ -1215,11 +1215,7 @@ table_with_metadata reader::impl::read(size_type skip_rows, auto col_type = to_type_id( _metadata->get_col_type(col.id), _use_np_dtypes, _timestamp_type.id(), decimal_as_float64); CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type"); - // Remove this once we support Decimal128 data type - CUDF_EXPECTS( - (col_type != type_id::DECIMAL64) or (_metadata->get_col_type(col.id).precision <= 18), - "Decimal data has precision > 18, Decimal64 data type doesn't support it."); - if (col_type == type_id::DECIMAL64) { + if (col_type == type_id::DECIMAL128) { // sign of the scale is changed since cuDF follows c++ libraries like CNL // which uses negative scaling, but liborc and other libraries // follow positive scaling. diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index bcbe77d9df8..5993d12fa6f 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -1722,7 +1722,7 @@ __global__ void __launch_bounds__(block_size) case DOUBLE: case LONG: case DECIMAL: - static_cast(data_out)[row] = s->vals.u64[t + vals_skipped]; + static_cast<__uint128_t*>(data_out)[row] = s->vals.u64[t + vals_skipped]; break; case MAP: case LIST: { diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index 3e313a7399f..ccfa42deea7 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -116,9 +116,8 @@ static inline __device__ uint64_t zigzag(int64_t v) return ((v ^ -s) * 2) + s; } -static inline __device__ uint64_t zigzag(__int128_t v) +static inline __device__ __uint128_t zigzag(__int128_t v) { - // TODO int64_t s = (v < 0) ? 1 : 0; return ((v ^ -s) * 2) + s; } @@ -285,11 +284,11 @@ static const __device__ __constant__ uint8_t kByteLengthToRLEv2_W[9] = { /** * @brief Encode a varint value, return the number of bytes written */ -static inline __device__ uint32_t StoreVarint(uint8_t* dst, uint64_t v) +static inline __device__ uint32_t StoreVarint(uint8_t* dst, __uint128_t v) { uint32_t bytecnt = 0; for (;;) { - uint32_t c = (uint32_t)(v & 0x7f); + auto c = static_cast(v & 0x7f); v >>= 7u; if (v == 0) { dst[bytecnt++] = c; @@ -950,7 +949,7 @@ __global__ void __launch_bounds__(block_size) case DECIMAL: { if (is_value_valid) { auto const id = column.type().id(); - uint64_t const zz_val = + __uint128_t const zz_val = id == type_id::DECIMAL32 ? zigzag(column.element(row)) : id == type_id::DECIMAL64 ? zigzag(column.element(row)) : zigzag(column.element<__int128_t>(row)); diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 866be896012..67c67f4e432 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -1636,13 +1636,13 @@ encoder_decimal_info decimal_chunk_sizes(orc_table_view& orc_table, if (col.is_null(idx) or not bit_value_or(pushdown_mask, idx, true)) return 0u; - int64_t const element = + __int128_t const element = col.type().id() == type_id::DECIMAL32 ? col.element(idx) : col.type().id() == type_id::DECIMAL64 ? col.element(idx) : col.element<__int128_t>(idx); - int64_t const sign = (element < 0) ? 1 : 0; - uint64_t zigzaged_value = ((element ^ -sign) * 2) + sign; + __int128_t const sign = (element < 0) ? 1 : 0; + __uint128_t zigzaged_value = ((element ^ -sign) * 2) + sign; uint32_t encoded_length = 1; while (zigzaged_value > 127) { diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index d4a49cb6f6d..de9af2282c0 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -342,9 +342,7 @@ struct leaf_schema_fn { col_schema.type = Type::INT64; col_schema.stats_dtype = statistics_dtype::dtype_decimal64; } else if (std::is_same_v) { - // TODO - // col_schema.type = Type::INT64; - // col_schema.stats_dtype = statistics_dtype::dtype_decimal64; + CUDF_FAIL("decimal128 currently not supported for parquet writer"); } else { CUDF_FAIL("Unsupported fixed point type for parquet writer"); } From d87c9d4acc73842627989162e0757a816f3cbc4f Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Fri, 22 Oct 2021 17:08:51 -0400 Subject: [PATCH 070/112] Fix and partial test updates --- cpp/src/io/orc/stripe_data.cu | 2 ++ cpp/tests/io/orc_test.cpp | 10 +++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 5993d12fa6f..300bf889f90 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -1721,6 +1721,8 @@ __global__ void __launch_bounds__(block_size) case INT: static_cast(data_out)[row] = s->vals.u32[t + vals_skipped]; break; case DOUBLE: case LONG: + static_cast(data_out)[row] = s->vals.u64[t + vals_skipped]; + break; case DECIMAL: static_cast<__uint128_t*>(data_out)[row] = s->vals.u64[t + vals_skipped]; break; diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index f2d5952d0ed..c87c15db664 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -341,9 +341,9 @@ TEST_F(OrcWriterTest, MultiColumn) auto col3_data = random_values(num_rows); auto col4_data = random_values(num_rows); auto col5_data = random_values(num_rows); - auto col6_vals = random_values(num_rows); + auto col6_vals = random_values(num_rows); auto col6_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { - return numeric::decimal64{col6_vals[i], numeric::scale_type{2}}; + return numeric::decimal128{col6_vals[i], numeric::scale_type{2}}; }); auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); @@ -353,7 +353,7 @@ TEST_F(OrcWriterTest, MultiColumn) column_wrapper col3{col3_data.begin(), col3_data.end(), validity}; column_wrapper col4{col4_data.begin(), col4_data.end(), validity}; column_wrapper col5{col5_data.begin(), col5_data.end(), validity}; - column_wrapper col6{col6_data, col6_data + num_rows, validity}; + column_wrapper col6{col6_data, col6_data + num_rows, validity}; cudf::test::lists_column_wrapper col7{ {9, 8}, {7, 6, 5}, {}, {4}, {3, 2, 1, 0}, {20, 21, 22, 23, 24}, {}, {66, 666}, {}, {-1, -2}}; @@ -401,7 +401,7 @@ TEST_F(OrcWriterTest, MultiColumnWithNulls) auto col5_data = random_values(num_rows); auto col6_vals = random_values(num_rows); auto col6_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { - return numeric::decimal64{col6_vals[i], numeric::scale_type{2}}; + return numeric::decimal128{col6_vals[i], numeric::scale_type{2}}; }); auto col0_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 2); }); @@ -423,7 +423,7 @@ TEST_F(OrcWriterTest, MultiColumnWithNulls) column_wrapper col3{col3_data.begin(), col3_data.end(), col3_mask}; column_wrapper col4{col4_data.begin(), col4_data.end(), col4_mask}; column_wrapper col5{col5_data.begin(), col5_data.end(), col5_mask}; - column_wrapper col6{col6_data, col6_data + num_rows, col6_mask}; + column_wrapper col6{col6_data, col6_data + num_rows, col6_mask}; cudf::test::lists_column_wrapper col7{ {{9, 8}, {7, 6, 5}, {}, {4}, {3, 2, 1, 0}, {20, 21, 22, 23, 24}, {}, {66, 666}, {}, {-1, -2}}, col0_mask}; From 3b9a61175d406c88311e836aa8109e2d15050838 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Mon, 25 Oct 2021 14:20:32 -0400 Subject: [PATCH 071/112] Clean up --- cpp/include/cudf/fixed_point/fixed_point.hpp | 3 ++- cpp/src/io/orc/stripe_data.cu | 4 +--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index 8178aecd83d..b356d857f32 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -49,7 +49,8 @@ enum class Radix : int32_t { BASE_2 = 2, BASE_10 = 10 }; template constexpr inline auto is_supported_representation_type() { - return cuda::std::is_same_v || cuda::std::is_same_v || + return cuda::std::is_same_v || // + cuda::std::is_same_v || // cuda::std::is_same_v; } diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 300bf889f90..a3cb1581266 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -1720,9 +1720,7 @@ __global__ void __launch_bounds__(block_size) case FLOAT: case INT: static_cast(data_out)[row] = s->vals.u32[t + vals_skipped]; break; case DOUBLE: - case LONG: - static_cast(data_out)[row] = s->vals.u64[t + vals_skipped]; - break; + case LONG: static_cast(data_out)[row] = s->vals.u64[t + vals_skipped]; break; case DECIMAL: static_cast<__uint128_t*>(data_out)[row] = s->vals.u64[t + vals_skipped]; break; From 5bab167b132177b5ebeff383bd318794c089edd3 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Mon, 25 Oct 2021 15:48:37 -0400 Subject: [PATCH 072/112] Update libcudacxx --- cpp/cmake/thirdparty/get_libcudacxx.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/cmake/thirdparty/get_libcudacxx.cmake b/cpp/cmake/thirdparty/get_libcudacxx.cmake index 772e14c66da..aab75f63bf6 100644 --- a/cpp/cmake/thirdparty/get_libcudacxx.cmake +++ b/cpp/cmake/thirdparty/get_libcudacxx.cmake @@ -16,8 +16,8 @@ function(find_and_configure_libcudacxx VERSION) rapids_cpm_find(libcudacxx ${VERSION} - GIT_REPOSITORY https://gitlab-master.nvidia.com/nvhpc/libcudacxx.git - GIT_TAG staging/1.6.0 + GIT_REPOSITORY https://github.com/NVIDIA/libcudacxx.git + GIT_TAG branch/1.6.0 GIT_SHALLOW TRUE DOWNLOAD_ONLY TRUE ) From a4c03e57e4fde13cbd148cbd0a93c380641e1c7d Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Mon, 25 Oct 2021 21:23:28 -0400 Subject: [PATCH 073/112] Fixing OrcWriterTestDecimal.Decimal64 test --- cpp/tests/io/orc_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index c87c15db664..f0e83f3e634 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -1138,10 +1138,10 @@ TEST_P(OrcWriterTestDecimal, Decimal64) // Using int16_t because scale causes values to overflow if they already require 32 bits auto const vals = random_values(num_rows); auto data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { - return numeric::decimal64{vals[i], numeric::scale_type{scale}}; + return numeric::decimal128{vals[i], numeric::scale_type{scale}}; }); auto mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 7 == 0; }); - column_wrapper col{data, data + num_rows, mask}; + column_wrapper col{data, data + num_rows, mask}; cudf::table_view tbl({static_cast(col)}); auto filepath = temp_env->get_temp_filepath("Decimal64.orc"); From 976fb743233b937ec05cfa85b0f58e893e100891 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Mon, 25 Oct 2021 21:27:31 -0400 Subject: [PATCH 074/112] Fix rest of ORC_TEST --- cpp/tests/io/orc_test.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index f0e83f3e634..e3ca8824880 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -1185,13 +1185,13 @@ TEST_F(OrcWriterTest, Decimal32) cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); auto result = cudf_io::read_orc(in_opts); - // Need a 64bit decimal column for comparison since the reader always creates DECIMAL64 columns - auto data64 = cudf::detail::make_counting_transform_iterator(0, [&vals](auto i) { - return numeric::decimal64{vals[i], numeric::scale_type{2}}; + // Need a 128bit decimal column for comparison since the reader always creates DECIMAL128 columns + auto data128 = cudf::detail::make_counting_transform_iterator(0, [&vals](auto i) { + return numeric::decimal128{vals[i], numeric::scale_type{2}}; }); - column_wrapper col64{data64, data64 + num_rows, mask}; + column_wrapper col128{data128, data128 + num_rows, mask}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(col64, result.tbl->view().column(0)); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(col128, result.tbl->view().column(0)); } TEST_F(OrcStatisticsTest, Overflow) From c9c7250f5906a439c16b94e4e22ac266104ea3ef Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 26 Oct 2021 14:34:32 -0400 Subject: [PATCH 075/112] ORC changes for decimal128 --- cpp/src/io/orc/stripe_data.cu | 60 +++++++++++------------------------ 1 file changed, 18 insertions(+), 42 deletions(-) diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index a3cb1581266..5b68a425cb4 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -133,6 +133,8 @@ struct orcdec_state_s { uint64_t u64[block_size]; int64_t i64[block_size]; double f64[block_size]; + __int128_t i128[block_size]; // TMP + __uint128_t u128[block_size]; // TMP } vals; }; @@ -451,29 +453,18 @@ inline __device__ int decode_base128_varint(volatile orc_bytestream_s* bs, int p /** * @brief Decodes a signed int128 encoded as base-128 varint (used for decimals) */ -inline __device__ int128_s decode_varint128(volatile orc_bytestream_s* bs, int pos) +inline __device__ __int128_t decode_varint128(volatile orc_bytestream_s* bs, int pos) { - uint32_t b = bytestream_readbyte(bs, pos++); - int64_t sign_mask = -(int32_t)(b & 1); - uint64_t v = (b >> 1) & 0x3f; - uint32_t bitpos = 6; - uint64_t lo = v; - uint64_t hi = 0; + uint32_t b = bytestream_readbyte(bs, pos++); + __int128_t sign_mask = -(int32_t)(b & 1); + __int128_t v = (b >> 1) & 0x3f; + uint32_t bitpos = 6; while (b > 0x7f && bitpos < 128) { b = bytestream_readbyte(bs, pos++); v |= ((uint64_t)(b & 0x7f)) << (bitpos & 0x3f); - if (bitpos == 62) { // 6 + 7 * 8 = 62 - lo = v; - v = (b & 0x7f) >> 2; // 64 - 62 - } bitpos += 7; } - if (bitpos >= 64) { - hi = v; - } else { - lo = v; - } - return {(uint64_t)(lo ^ sign_mask), (int64_t)(hi ^ sign_mask)}; + return v ^ sign_mask; } /** @@ -1046,8 +1037,8 @@ static __device__ int Decode_Decimals(orc_bytestream_s* bs, uint32_t pos = lastpos; pos += varint_length(bs, pos); if (pos > maxpos) break; - vals.i64[n] = lastpos; - lastpos = pos; + vals.i64[2 * n] = lastpos; + lastpos = pos; } scratch->num_vals = n; bytestream_flush_bytes(bs, lastpos - bs->pos); @@ -1055,11 +1046,11 @@ static __device__ int Decode_Decimals(orc_bytestream_s* bs, __syncthreads(); uint32_t num_vals_to_read = scratch->num_vals; if (t >= num_vals_read and t < num_vals_to_read) { - auto const pos = static_cast(vals.i64[t]); - int128_s v = decode_varint128(bs, pos); + auto const pos = static_cast(vals.i64[2 * t]); + __int128_t v = decode_varint128(bs, pos); if (col_scale & orc_decimal2float64_scale) { - double f = Int128ToDouble_rn(v.lo, v.hi); + double f = v; int32_t scale = (t < numvals) ? val_scale : 0; if (scale >= 0) vals.f64[t] = f / kPow10[min(scale, 39)]; @@ -1071,27 +1062,12 @@ static __device__ int Decode_Decimals(orc_bytestream_s* bs, // of them will be used to add 0s or remove digits. int32_t scale = (t < numvals) ? col_scale - val_scale : 0; if (scale >= 0) { - scale = min(scale, 27); - vals.i64[t] = ((int64_t)v.lo * kPow5i[scale]) << scale; + scale = min(scale, 27); + vals.i128[t] = v * kPow10[scale]; } else // if (scale < 0) { - bool is_negative = (v.hi < 0); - uint64_t hi = v.hi, lo = v.lo; - scale = min(-scale, 27); - if (is_negative) { - hi = (~hi) + (lo == 0); - lo = (~lo) + 1; - } - lo = (lo >> (uint32_t)scale) | ((uint64_t)hi << (64 - scale)); - hi >>= (int32_t)scale; - if (hi != 0) { - // Use intermediate float - lo = __double2ull_rn(Int128ToDouble_rn(lo, hi) / __ll2double_rn(kPow5i[scale])); - hi = 0; - } else { - lo /= kPow5i[scale]; - } - vals.i64[t] = (is_negative) ? -(int64_t)lo : (int64_t)lo; + scale = min(-scale, 27); // should be irrelevant + vals.i128[t] = v / kPow10[scale]; } } } @@ -1722,7 +1698,7 @@ __global__ void __launch_bounds__(block_size) case DOUBLE: case LONG: static_cast(data_out)[row] = s->vals.u64[t + vals_skipped]; break; case DECIMAL: - static_cast<__uint128_t*>(data_out)[row] = s->vals.u64[t + vals_skipped]; + static_cast<__uint128_t*>(data_out)[row] = s->vals.u128[t + vals_skipped]; break; case MAP: case LIST: { From 46bd2d86222206634487a7aa372ec18503c64e0b Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 26 Oct 2021 15:00:46 -0400 Subject: [PATCH 076/112] ORC fixes for decima128 --- cpp/src/io/orc/stripe_data.cu | 4 ++-- cpp/tests/io/orc_test.cpp | 17 +++++++++++------ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 5b68a425cb4..3781e31cb9f 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -1063,11 +1063,11 @@ static __device__ int Decode_Decimals(orc_bytestream_s* bs, int32_t scale = (t < numvals) ? col_scale - val_scale : 0; if (scale >= 0) { scale = min(scale, 27); - vals.i128[t] = v * kPow10[scale]; + vals.i128[t] = (v * kPow5i[scale]) << scale; } else // if (scale < 0) { scale = min(-scale, 27); // should be irrelevant - vals.i128[t] = v / kPow10[scale]; + vals.i128[t] = (v / kPow5i[scale]) >> scale; } } } diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index e3ca8824880..8836e57a932 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -345,6 +345,9 @@ TEST_F(OrcWriterTest, MultiColumn) auto col6_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return numeric::decimal128{col6_vals[i], numeric::scale_type{2}}; }); + auto col7_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { + return numeric::decimal128{col6_vals[i], numeric::scale_type{-2}}; + }); auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); column_wrapper col0{col0_data.begin(), col0_data.end(), validity}; @@ -354,15 +357,16 @@ TEST_F(OrcWriterTest, MultiColumn) column_wrapper col4{col4_data.begin(), col4_data.end(), validity}; column_wrapper col5{col5_data.begin(), col5_data.end(), validity}; column_wrapper col6{col6_data, col6_data + num_rows, validity}; + column_wrapper col7{col7_data, col7_data + num_rows, validity}; - cudf::test::lists_column_wrapper col7{ + cudf::test::lists_column_wrapper col8{ {9, 8}, {7, 6, 5}, {}, {4}, {3, 2, 1, 0}, {20, 21, 22, 23, 24}, {}, {66, 666}, {}, {-1, -2}}; auto child_col = cudf::test::fixed_width_column_wrapper{48, 27, 25, 31, 351, 351, 29, 15, -1, -99}; - auto col8 = cudf::test::structs_column_wrapper{child_col}; + auto col9 = cudf::test::structs_column_wrapper{child_col}; - table_view expected({col0, col1, col2, col3, col4, col5, col6, col7, col8}); + table_view expected({col0, col1, col2, col3, col4, col5, col6, col7, col8, col9}); cudf_io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("bools"); @@ -371,9 +375,10 @@ TEST_F(OrcWriterTest, MultiColumn) expected_metadata.column_metadata[3].set_name("int32s"); expected_metadata.column_metadata[4].set_name("floats"); expected_metadata.column_metadata[5].set_name("doubles"); - expected_metadata.column_metadata[6].set_name("decimal"); - expected_metadata.column_metadata[7].set_name("lists"); - expected_metadata.column_metadata[8].set_name("structs"); + expected_metadata.column_metadata[6].set_name("decimal_pos_scale"); + expected_metadata.column_metadata[7].set_name("decimal_neg_scale"); + expected_metadata.column_metadata[8].set_name("lists"); + expected_metadata.column_metadata[9].set_name("structs"); auto filepath = temp_env->get_temp_filepath("OrcMultiColumn.orc"); cudf_io::orc_writer_options out_opts = From 8a86d76a3d714e82e7130762640cb4b098ce1a44 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 27 Oct 2021 11:22:06 -0400 Subject: [PATCH 077/112] Binary op changes / GROUPBY_TEST working --- cpp/src/binaryop/binaryop.cpp | 16 +- cpp/tests/binaryop/binop-compiled-test.cpp | 156 ++++++++++-------- cpp/tests/binaryop/binop-integration-test.cpp | 3 - 3 files changed, 95 insertions(+), 80 deletions(-) diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index c09962219a9..9b940472080 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -273,19 +273,13 @@ void fixed_point_binary_operation_validation(binary_operator op, Rhs rhs, thrust::optional output_type = {}) { - CUDF_EXPECTS(is_fixed_point(lhs), "Input must have fixed_point data_type."); - CUDF_EXPECTS(is_fixed_point(rhs), "Input must have fixed_point data_type."); + CUDF_EXPECTS((is_fixed_point(lhs) or is_fixed_point(rhs)), + "One of the inputs must have fixed_point data_type."); CUDF_EXPECTS(binops::is_supported_fixed_point_binop(op), "Unsupported fixed_point binary operation"); - CUDF_EXPECTS(lhs.id() == rhs.id(), "Data type mismatch"); - if (output_type.has_value()) { - if (binops::is_comparison_binop(op)) - CUDF_EXPECTS(output_type == cudf::data_type{type_id::BOOL8}, - "Comparison operations require boolean output type."); - else - CUDF_EXPECTS(is_fixed_point(output_type.value()), - "fixed_point binary operations require fixed_point output type."); - } + if (output_type.has_value() and binops::is_comparison_binop(op)) + CUDF_EXPECTS(output_type == cudf::data_type{type_id::BOOL8}, + "Comparison operations require boolean output type."); } /** diff --git a/cpp/tests/binaryop/binop-compiled-test.cpp b/cpp/tests/binaryop/binop-compiled-test.cpp index 206b0252abf..52708829502 100644 --- a/cpp/tests/binaryop/binop-compiled-test.cpp +++ b/cpp/tests/binaryop/binop-compiled-test.cpp @@ -107,22 +107,27 @@ struct BinaryOperationCompiledTest : public BinaryOperationTest { // t t + d // d d + t d + d -using Add_types = cudf::test::Types< - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - // cudf::test::Types, //valid - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - // Extras - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types>; +using namespace numeric; + +using Add_types = cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + // cudf::test::Types, //valid + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + // Extras + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types>; + template struct BinaryOperationCompiledTest_Add : public BinaryOperationCompiledTest { }; @@ -139,15 +144,19 @@ TYPED_TEST(BinaryOperationCompiledTest_Add, Vector_Vector) // t t - t t - d // d d - d -using Sub_types = cudf::test::Types< - cudf::test::Types, // n - n - cudf::test::Types, // t - t - cudf::test::Types, // t - d - cudf::test::Types, // d - d - cudf::test::Types, // d - d - cudf::test::Types, - cudf::test::Types, - cudf::test::Types>; +using Sub_types = + cudf::test::Types, // n - n + cudf::test::Types, // t - t + cudf::test::Types, // t - d + cudf::test::Types, // d - d + cudf::test::Types, // d - d + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types>; + template struct BinaryOperationCompiledTest_Sub : public BinaryOperationCompiledTest { }; @@ -163,14 +172,20 @@ TYPED_TEST(BinaryOperationCompiledTest_Sub, Vector_Vector) // n n * n n * d // t // d d * n -using Mul_types = cudf::test::Types< - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types>; +using Mul_types = cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types>; + template struct BinaryOperationCompiledTest_Mul : public BinaryOperationCompiledTest { }; @@ -186,17 +201,20 @@ TYPED_TEST(BinaryOperationCompiledTest_Mul, Vector_Vector) // n n / n // t // d d / n d / d -using Div_types = cudf::test::Types< - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types>; +using Div_types = cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types>; + template struct BinaryOperationCompiledTest_Div : public BinaryOperationCompiledTest { }; @@ -216,6 +234,7 @@ using TrueDiv_types = cudf::test::Types, cudf::test::Types, cudf::test::Types>; + template struct BinaryOperationCompiledTest_TrueDiv : public BinaryOperationCompiledTest { }; @@ -458,18 +477,17 @@ TYPED_TEST(BinaryOperationCompiledTest_Logical, LogicalOr_Vector_Vector) // Comparison Operations ==, !=, <, >, <=, >= // nn, tt, dd, ss, dcdc -using Comparison_types = - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types>; +using Comparison_types = cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types>; template struct BinaryOperationCompiledTest_Comparison : public BinaryOperationCompiledTest { @@ -514,16 +532,22 @@ TYPED_TEST(BinaryOperationCompiledTest_Comparison, GreaterEqual_Vector_Vector) // d . // s . // dc . . -using Null_types = cudf::test::Types< - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - cudf::test::Types, - // cudf::test::Types, // only fixed-width - cudf::test::Types, - cudf::test::Types, - cudf::test::Types>; +using Null_types = + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + // cudf::test::Types, // only fixed-width + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types, + cudf::test::Types>; template struct BinaryOperationCompiledTest_NullOps : public BinaryOperationCompiledTest { diff --git a/cpp/tests/binaryop/binop-integration-test.cpp b/cpp/tests/binaryop/binop-integration-test.cpp index fa3d9d13f0d..f47a618fbe0 100644 --- a/cpp/tests/binaryop/binop-integration-test.cpp +++ b/cpp/tests/binaryop/binop-integration-test.cpp @@ -2665,11 +2665,8 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpThrows) auto const col = fp_wrapper{{100, 300, 500, 700}, scale_type{-2}}; auto const non_bool_type = data_type{type_to_id(), -2}; - auto const float_type = data_type{type_id::FLOAT32}; EXPECT_THROW(cudf::binary_operation(col, col, cudf::binary_operator::LESS, non_bool_type), cudf::logic_error); - EXPECT_THROW(cudf::binary_operation(col, col, cudf::binary_operator::MUL, float_type), - cudf::logic_error); } template From e54d3fa045412ca7f430f7cc68d867932a9ee330 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Thu, 28 Oct 2021 16:17:15 -0400 Subject: [PATCH 078/112] Test for blog --- cpp/tests/round/round_tests.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/cpp/tests/round/round_tests.cpp b/cpp/tests/round/round_tests.cpp index 4d1f66443c2..5a2e1353fb0 100644 --- a/cpp/tests/round/round_tests.cpp +++ b/cpp/tests/round/round_tests.cpp @@ -284,6 +284,20 @@ TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfNegEven3) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } +TYPED_TEST(RoundTestsFixedPointTypes, TestForBlog) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + auto const input = fp_wrapper{{25649999}, scale_type{-5}}; + auto const expected = fp_wrapper{{256}, scale_type{0}}; + auto const result = cudf::round(input); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + TYPED_TEST(RoundTestsFloatingPointTypes, SimpleFloatingPointTestHalfUp0) { using fw_wrapper = cudf::test::fixed_width_column_wrapper; From 92694b8ec63686f5f2b6d227defe0046b131a0a7 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Mon, 1 Nov 2021 15:53:39 -0400 Subject: [PATCH 079/112] Merge conflict fix --- cpp/src/io/orc/reader_impl.cu | 8 -------- 1 file changed, 8 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 4c57aa71a13..a3c108421ef 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -944,15 +944,7 @@ table_with_metadata reader::impl::read(size_type skip_rows, auto col_type = to_type_id( _metadata.get_col_type(col.id), _use_np_dtypes, _timestamp_type.id(), decimal_as_float64); CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type"); -<<<<<<< HEAD if (col_type == type_id::DECIMAL128) { -======= - // Remove this once we support Decimal128 data type - CUDF_EXPECTS( - (col_type != type_id::DECIMAL64) or (_metadata.get_col_type(col.id).precision <= 18), - "Decimal data has precision > 18, Decimal64 data type doesn't support it."); - if (col_type == type_id::DECIMAL64) { ->>>>>>> branch-21.12 // sign of the scale is changed since cuDF follows c++ libraries like CNL // which uses negative scaling, but liborc and other libraries // follow positive scaling. From 44d05733dafdc534a6d3d40dd48993ade4b1c48b Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 2 Nov 2021 13:24:12 -0400 Subject: [PATCH 080/112] Temporary fix --- cpp/cmake/libcudacxx.patch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake/libcudacxx.patch b/cpp/cmake/libcudacxx.patch index ef11688311b..3cdc40ef084 100644 --- a/cpp/cmake/libcudacxx.patch +++ b/cpp/cmake/libcudacxx.patch @@ -7,7 +7,7 @@ index d55a43688..654142d7e 100644 #define _LIBCUDACXX_CUDACC_VER_BUILD __CUDACC_VER_BUILD__ #define _LIBCUDACXX_CUDACC_VER \ - _LIBCUDACXX_CUDACC_VER_MAJOR * 10000 + _LIBCUDACXX_CUDACC_VER_MINOR * 100 + \ -+ _LIBCUDACXX_CUDACC_VER_MAJOR * 10000 + _LIBCUDACXX_CUDACC_VER_MINOR * 1000 + \ ++ _LIBCUDACXX_CUDACC_VER_MAJOR * 100000 + _LIBCUDACXX_CUDACC_VER_MINOR * 1000 + \ _LIBCUDACXX_CUDACC_VER_BUILD #define _LIBCUDACXX_HAS_NO_LONG_DOUBLE From 99a82ee5405f98535c254a5c81e81977c100f626 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 2 Nov 2021 14:10:08 -0400 Subject: [PATCH 081/112] Update CONTRIBUTING.md --- CONTRIBUTING.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f83d7c5b759..aae62fbd47c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -62,12 +62,12 @@ The following instructions are for developers and contributors to cuDF OSS devel Compilers: * `gcc` version 9.3+ -* `nvcc` version 11.0+ +* `nvcc` version 11.5+ * `cmake` version 3.20.1+ CUDA/GPU: -* CUDA 11.0+ +* CUDA 11.5+ * NVIDIA driver 450.80.02+ * Pascal architecture or better From 99ad08bfe1ec58fa437ff00ff95ea6211f991d22 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 2 Nov 2021 16:24:42 -0400 Subject: [PATCH 082/112] Temporary --- cpp/cmake/thirdparty/get_cucollections.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index 6764c78ed87..ecf02c22885 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -22,8 +22,8 @@ function(find_and_configure_cucollections) rapids_cpm_find(cuco 0.0 GLOBAL_TARGETS cuco::cuco CPM_ARGS - GITHUB_REPOSITORY NVIDIA/cuCollections - GIT_TAG 62b90b7f7adf272455007b1c857e1d621aaf13ca + GITHUB_REPOSITORY robertmaynard/cuCollections + GIT_TAG bf6a90db78516e099d07e845a39012dbcaa8de18 OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF" From 95a24020c1200c6a48f95620debfc3ff6702abc1 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 2 Nov 2021 21:35:42 -0400 Subject: [PATCH 083/112] Sum Aggregation uses same type for accumulator --- cpp/include/cudf/detail/aggregation/aggregation.hpp | 13 ++----------- cpp/tests/groupby/sum_scan_tests.cpp | 6 ++---- cpp/tests/groupby/sum_tests.cpp | 12 ++++-------- 3 files changed, 8 insertions(+), 23 deletions(-) diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index a10ffcffcfe..c2bd7a4893c 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -1118,17 +1118,8 @@ template struct target_type_impl< Source, k, - std::enable_if_t() && - not std::is_same_v && (k == aggregation::SUM)>> { - using type = numeric::decimal64; -}; - -template -struct target_type_impl< - Source, - k, - std::enable_if_t && (k == aggregation::SUM)>> { - using type = numeric::decimal128; + std::enable_if_t() && (k == aggregation::SUM)>> { + using type = Source; }; // Summing/Multiplying float/doubles, use same type accumulator diff --git a/cpp/tests/groupby/sum_scan_tests.cpp b/cpp/tests/groupby/sum_scan_tests.cpp index 6b813f8b6db..eab73c01dd9 100644 --- a/cpp/tests/groupby/sum_scan_tests.cpp +++ b/cpp/tests/groupby/sum_scan_tests.cpp @@ -144,8 +144,6 @@ TYPED_TEST(FixedPointTestAllReps, GroupBySortSumScanDecimalAsValue) using decimalXX = TypeParam; using RepType = cudf::device_storage_type_t; using fp_wrapper = fixed_point_column_wrapper; - using SumType = std::conditional_t, __int128_t, int64_t>; - using out_fp_wrapper = fixed_point_column_wrapper; for (auto const i : {2, 1, 0, -1, -2}) { auto const scale = scale_type{i}; @@ -153,8 +151,8 @@ TYPED_TEST(FixedPointTestAllReps, GroupBySortSumScanDecimalAsValue) auto const keys = key_wrapper{1, 2, 3, 1, 2, 2, 1, 3, 3, 2}; auto const vals = fp_wrapper{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, scale}; - auto const expect_keys = key_wrapper {1, 1, 1, 2, 2, 2, 2, 3, 3, 3}; - auto const expect_vals_sum = out_fp_wrapper{{0, 3, 9, 1, 5, 10, 19, 2, 9, 17}, scale}; + auto const expect_keys = key_wrapper{1, 1, 1, 2, 2, 2, 2, 3, 3, 3}; + auto const expect_vals_sum = fp_wrapper{{0, 3, 9, 1, 5, 10, 19, 2, 9, 17}, scale}; // clang-format on auto agg2 = cudf::make_sum_aggregation(); diff --git a/cpp/tests/groupby/sum_tests.cpp b/cpp/tests/groupby/sum_tests.cpp index ed42386b694..b12372c1e08 100644 --- a/cpp/tests/groupby/sum_tests.cpp +++ b/cpp/tests/groupby/sum_tests.cpp @@ -168,9 +168,7 @@ TYPED_TEST(FixedPointTestAllReps, GroupBySortSumDecimalAsValue) using decimalXX = TypeParam; using RepType = cudf::device_storage_type_t; using fp_wrapper = cudf::test::fixed_point_column_wrapper; - using SumType = std::conditional_t, __int128_t, int64_t>; - using fp64_wrapper = cudf::test::fixed_point_column_wrapper; - using K = int32_t; + using K = int32_t; for (auto const i : {2, 1, 0, -1, -2}) { auto const scale = scale_type{i}; @@ -180,7 +178,7 @@ TYPED_TEST(FixedPointTestAllReps, GroupBySortSumDecimalAsValue) // clang-format on auto const expect_keys = fixed_width_column_wrapper{1, 2, 3}; - auto const expect_vals_sum = fp64_wrapper{{9, 19, 17}, scale}; + auto const expect_vals_sum = fp_wrapper{{9, 19, 17}, scale}; auto agg1 = cudf::make_sum_aggregation(); test_single_agg( @@ -199,9 +197,7 @@ TYPED_TEST(FixedPointTestAllReps, GroupByHashSumDecimalAsValue) using decimalXX = TypeParam; using RepType = cudf::device_storage_type_t; using fp_wrapper = cudf::test::fixed_point_column_wrapper; - using SumType = std::conditional_t, __int128_t, int64_t>; - using fp64_wrapper = cudf::test::fixed_point_column_wrapper; - using K = int32_t; + using K = int32_t; for (auto const i : {2, 1, 0, -1, -2}) { auto const scale = scale_type{i}; @@ -211,7 +207,7 @@ TYPED_TEST(FixedPointTestAllReps, GroupByHashSumDecimalAsValue) // clang-format on auto const expect_keys = fixed_width_column_wrapper{1, 2, 3}; - auto const expect_vals_sum = fp64_wrapper{{9, 19, 17}, scale}; + auto const expect_vals_sum = fp_wrapper{{9, 19, 17}, scale}; auto agg5 = cudf::make_sum_aggregation(); test_single_agg(keys, vals, expect_keys, expect_vals_sum, std::move(agg5)); From 5ecd793519815cece2706a06f83f6a1c69596d4b Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 3 Nov 2021 10:14:21 -0400 Subject: [PATCH 084/112] ORC changes --- cpp/src/io/orc/stripe_data.cu | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 3781e31cb9f..5cd569f0d0a 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -45,11 +45,6 @@ inline __device__ uint8_t is_dictionary(uint8_t encoding_mode) { return encoding static __device__ __constant__ int64_t kORCTimeToUTC = 1420070400; // Seconds from January 1st, 1970 to January 1st, 2015 -struct int128_s { - uint64_t lo; - int64_t hi; -}; - struct orc_bytestream_s { const uint8_t* base; uint32_t pos; @@ -1022,6 +1017,7 @@ static __device__ int Decode_Decimals(orc_bytestream_s* bs, volatile orcdec_state_s::values& vals, int val_scale, int numvals, + TypeKind dtype_kind, int col_scale, int t) { @@ -1049,7 +1045,7 @@ static __device__ int Decode_Decimals(orc_bytestream_s* bs, auto const pos = static_cast(vals.i64[2 * t]); __int128_t v = decode_varint128(bs, pos); - if (col_scale & orc_decimal2float64_scale) { + if (dtype_kind == DOUBLE) { double f = v; int32_t scale = (t < numvals) ? val_scale : 0; if (scale >= 0) @@ -1066,7 +1062,7 @@ static __device__ int Decode_Decimals(orc_bytestream_s* bs, vals.i128[t] = (v * kPow5i[scale]) << scale; } else // if (scale < 0) { - scale = min(-scale, 27); // should be irrelevant + scale = min(-scale, 27); vals.i128[t] = (v / kPow5i[scale]) >> scale; } } @@ -1629,8 +1625,14 @@ __global__ void __launch_bounds__(block_size) } val_scale = (t < numvals) ? (int)s->vals.i64[skip + t] : 0; __syncthreads(); - numvals = Decode_Decimals( - &s->bs, &s->u.rle8, s->vals, val_scale, numvals, s->chunk.decimal_scale, t); + numvals = Decode_Decimals(&s->bs, + &s->u.rle8, + s->vals, + val_scale, + numvals, + s->chunk.type_kind, + s->chunk.decimal_scale, + t); } __syncthreads(); } else if (s->chunk.type_kind == FLOAT) { From f55e0508f4d6b370d60827bb1f2b91f4cd2d1248 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 3 Nov 2021 15:14:56 -0400 Subject: [PATCH 085/112] Full ORC fix --- cpp/src/io/orc/orc_gpu.h | 6 +----- cpp/src/io/orc/reader_impl.cu | 4 ++-- cpp/src/io/orc/stripe_data.cu | 32 ++++++++++++++++---------------- cpp/tests/io/orc_test.cpp | 4 ++-- 4 files changed, 21 insertions(+), 25 deletions(-) diff --git a/cpp/src/io/orc/orc_gpu.h b/cpp/src/io/orc/orc_gpu.h index f6a7c3f5f03..ad4450bc6a7 100644 --- a/cpp/src/io/orc/orc_gpu.h +++ b/cpp/src/io/orc/orc_gpu.h @@ -84,11 +84,6 @@ struct DictionaryEntry { uint32_t len; // Length in data stream }; -/** - * @brief Mask to indicate conversion from decimals to float64 - */ -constexpr int orc_decimal2float64_scale = 0x80; - /** * @brief Struct to describe per stripe's column information */ @@ -111,6 +106,7 @@ struct ColumnDesc { ColumnEncodingKind encoding_kind; // column encoding kind TypeKind type_kind; // column data type uint8_t dtype_len; // data type length (for types that can be mapped to different sizes) + type_id dtype_id; // TODO int32_t decimal_scale; // number of fractional decimal digits for decimal type type_id timestamp_type_id; // output timestamp type id (type_id::EMPTY by default) column_validity_info parent_validity_info; // consists of parent column valid_map and null count diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index a3c108421ef..23cd8a0936b 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -1114,10 +1114,10 @@ table_with_metadata reader::impl::read(size_type skip_rows, chunk.num_child_rows = (chunk.type_kind != orc::STRUCT) ? 0 : chunk.num_rows; auto const decimal_as_float64 = should_convert_decimal_column_to_float( _decimal_cols_as_float, _metadata.per_file_metadata[0], columns_level[col_idx].id); + chunk.dtype_id = column_types[col_idx].id(); chunk.decimal_scale = _metadata.per_file_metadata[stripe_source_mapping.source_idx] .ff.types[columns_level[col_idx].id] - .scale.value_or(0) | - (decimal_as_float64 ? orc::gpu::orc_decimal2float64_scale : 0); + .scale.value_or(0); chunk.rowgroup_id = rowgroup_id; chunk.dtype_len = (column_types[col_idx].id() == type_id::STRING) diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 5cd569f0d0a..652c86364bd 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -448,19 +448,19 @@ inline __device__ int decode_base128_varint(volatile orc_bytestream_s* bs, int p /** * @brief Decodes a signed int128 encoded as base-128 varint (used for decimals) */ -inline __device__ __int128_t decode_varint128(volatile orc_bytestream_s* bs, int pos) -{ - uint32_t b = bytestream_readbyte(bs, pos++); - __int128_t sign_mask = -(int32_t)(b & 1); - __int128_t v = (b >> 1) & 0x3f; - uint32_t bitpos = 6; - while (b > 0x7f && bitpos < 128) { - b = bytestream_readbyte(bs, pos++); - v |= ((uint64_t)(b & 0x7f)) << (bitpos & 0x3f); - bitpos += 7; - } - return v ^ sign_mask; -} + inline __device__ __int128_t decode_varint128(volatile orc_bytestream_s* bs, int pos) + { + auto byte = bytestream_readbyte(bs, pos++); + __int128_t const sign_mask = -(int32_t)(byte & 1); + __int128_t value = (byte >> 1) & 0x3f; + uint32_t bitpos = 6; + while (byte & 0x80 && bitpos < 128) { + byte = bytestream_readbyte(bs, pos++); + value |= ((__uint128_t)(byte & 0x7f)) << bitpos; + bitpos += 7; + } + return value ^ sign_mask; + } /** * @brief Decodes an unsigned 32-bit varint @@ -1017,7 +1017,7 @@ static __device__ int Decode_Decimals(orc_bytestream_s* bs, volatile orcdec_state_s::values& vals, int val_scale, int numvals, - TypeKind dtype_kind, + type_id dtype_id, int col_scale, int t) { @@ -1045,7 +1045,7 @@ static __device__ int Decode_Decimals(orc_bytestream_s* bs, auto const pos = static_cast(vals.i64[2 * t]); __int128_t v = decode_varint128(bs, pos); - if (dtype_kind == DOUBLE) { + if (dtype_id == type_id::FLOAT64) { double f = v; int32_t scale = (t < numvals) ? val_scale : 0; if (scale >= 0) @@ -1630,7 +1630,7 @@ __global__ void __launch_bounds__(block_size) s->vals, val_scale, numvals, - s->chunk.type_kind, + s->chunk.dtype_id, s->chunk.decimal_scale, t); } diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index 7e02e66d090..b29138f262c 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -343,10 +343,10 @@ TEST_F(OrcWriterTest, MultiColumn) auto col5_data = random_values(num_rows); auto col6_vals = random_values(num_rows); auto col6_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { - return numeric::decimal128{col6_vals[i], numeric::scale_type{2}}; + return numeric::decimal128{col6_vals[i], numeric::scale_type{12}}; }); auto col7_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { - return numeric::decimal128{col6_vals[i], numeric::scale_type{-2}}; + return numeric::decimal128{col6_vals[i], numeric::scale_type{-12}}; }); auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); From 216385ae9d91950a78f70e07b8cd739b092db51a Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 3 Nov 2021 16:10:54 -0400 Subject: [PATCH 086/112] clang-format --- cpp/src/io/orc/stripe_data.cu | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 652c86364bd..f121e1108dc 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -448,19 +448,19 @@ inline __device__ int decode_base128_varint(volatile orc_bytestream_s* bs, int p /** * @brief Decodes a signed int128 encoded as base-128 varint (used for decimals) */ - inline __device__ __int128_t decode_varint128(volatile orc_bytestream_s* bs, int pos) - { - auto byte = bytestream_readbyte(bs, pos++); - __int128_t const sign_mask = -(int32_t)(byte & 1); - __int128_t value = (byte >> 1) & 0x3f; - uint32_t bitpos = 6; - while (byte & 0x80 && bitpos < 128) { - byte = bytestream_readbyte(bs, pos++); - value |= ((__uint128_t)(byte & 0x7f)) << bitpos; - bitpos += 7; - } - return value ^ sign_mask; - } +inline __device__ __int128_t decode_varint128(volatile orc_bytestream_s* bs, int pos) +{ + auto byte = bytestream_readbyte(bs, pos++); + __int128_t const sign_mask = -(int32_t)(byte & 1); + __int128_t value = (byte >> 1) & 0x3f; + uint32_t bitpos = 6; + while (byte & 0x80 && bitpos < 128) { + byte = bytestream_readbyte(bs, pos++); + value |= ((__uint128_t)(byte & 0x7f)) << bitpos; + bitpos += 7; + } + return value ^ sign_mask; +} /** * @brief Decodes an unsigned 32-bit varint From 7ba47c70345a2d45b07f4f3f3e6e70270f344c06 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Thu, 4 Nov 2021 13:01:01 -0400 Subject: [PATCH 087/112] Reapply temporary fix --- cpp/cmake/thirdparty/get_cucollections.cmake | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index 911195e2f56..89e45809010 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -15,15 +15,16 @@ # This function finds cucollections and sets any additional necessary environment variables. function(find_and_configure_cucollections) - # Find or install cuCollections - rapids_cpm_find( - # cuCollections doesn't have a version yet - cuco 0.0 - GLOBAL_TARGETS cuco::cuco - CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections - GIT_TAG 62b90b7f7adf272455007b1c857e1d621aaf13ca - OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF" - ) + # Find or install cuCollections + rapids_cpm_find(cuco 0.0 + GLOBAL_TARGETS cuco::cuco + CPM_ARGS + GITHUB_REPOSITORY robertmaynard/cuCollections + GIT_TAG bf6a90db78516e099d07e845a39012dbcaa8de18 + OPTIONS "BUILD_TESTS OFF" + "BUILD_BENCHMARKS OFF" + "BUILD_EXAMPLES OFF" + ) endfunction() find_and_configure_cucollections() From 1034057301540a4f3355ce0ae16b16c481a6bb77 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Thu, 4 Nov 2021 13:02:07 -0400 Subject: [PATCH 088/112] Perf improvement for rescale --- cpp/src/unary/cast_ops.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/unary/cast_ops.cu b/cpp/src/unary/cast_ops.cu index 8428efabbd2..e852b00796a 100644 --- a/cpp/src/unary/cast_ops.cu +++ b/cpp/src/unary/cast_ops.cu @@ -176,7 +176,7 @@ std::unique_ptr rescale(column_view input, { using namespace numeric; - if (input.type().scale() > scale) { + if (input.type().scale() >= scale) { auto const scalar = make_fixed_point_scalar(0, scale_type{scale}); auto const type = cudf::data_type{cudf::type_to_id(), scale}; return detail::binary_operation(input, *scalar, binary_operator::ADD, type, stream, mr); From d6e9ee810acc06a6da3fed9c9b1ffc538dd23fbf Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Wed, 3 Nov 2021 16:27:06 -0700 Subject: [PATCH 089/112] default to dec64;make1128 slectable;fix tests;add options test --- cpp/include/cudf/io/orc.hpp | 30 ++++++++++ cpp/src/io/orc/aggregate_orc_metadata.hpp | 4 +- cpp/src/io/orc/reader_impl.cu | 70 ++++++++++++++--------- cpp/src/io/orc/reader_impl.hpp | 1 + cpp/src/io/orc/stripe_data.cu | 38 ++++++++---- cpp/tests/io/orc_test.cpp | 57 ++++++++++++++---- python/cudf/cudf/_lib/cpp/io/orc.pxd | 4 ++ 7 files changed, 152 insertions(+), 52 deletions(-) diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 2a95b85465b..33f0232b4d0 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -70,6 +70,9 @@ class orc_reader_options { // Columns that should be converted from Decimal to Float64 std::vector _decimal_cols_as_float; + // Columns that should be read as Decimal128 + std::vector _decimal128_columns; + friend orc_reader_options_builder; /** @@ -143,6 +146,11 @@ class orc_reader_options { return _decimal_cols_as_float; } + /** + * @brief Columns that should be read as 128-bit Decimal + */ + std::vector const& get_decimal128_columns() const { return _decimal128_columns; } + // Setters /** @@ -216,6 +224,16 @@ class orc_reader_options { { _decimal_cols_as_float = std::move(val); } + + /** + * @brief Set columns that should be read as 128-bit Decimal + * + * @param val Vector of column names. + */ + void set_decimal128_columns(std::vector val) + { + _decimal128_columns = std::move(val); + } }; class orc_reader_options_builder { @@ -332,6 +350,18 @@ class orc_reader_options_builder { return *this; } + /** + * @brief Columns that should be read as 128-bit Decimal + * + * @param val Vector of column names. + * @return this for chaining. + */ + orc_reader_options_builder& decimal128_columns(std::vector val) + { + options._decimal128_columns = std::move(val); + return *this; + } + /** * @brief move orc_reader_options member once it's built. */ diff --git a/cpp/src/io/orc/aggregate_orc_metadata.hpp b/cpp/src/io/orc/aggregate_orc_metadata.hpp index 356d20843e8..5132906a5fc 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.hpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.hpp @@ -86,7 +86,7 @@ class aggregate_orc_metadata { /** * @brief Returns the name of the given column from the given source. */ - auto column_name(const int source_idx, const int column_id) const + std::string const& column_name(const int source_idx, const int column_id) const { CUDF_EXPECTS(source_idx <= static_cast(per_file_metadata.size()), "Out of range source_idx provided"); @@ -98,7 +98,7 @@ class aggregate_orc_metadata { * * Full name includes ancestor columns' names. */ - auto column_path(const int source_idx, const int column_id) const + std::string const& column_path(const int source_idx, const int column_id) const { CUDF_EXPECTS(source_idx <= static_cast(per_file_metadata.size()), "Out of range source_idx provided"); diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 23cd8a0936b..d35f2db90fc 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -56,7 +56,7 @@ namespace { constexpr type_id to_type_id(const orc::SchemaType& schema, bool use_np_dtypes, type_id timestamp_type_id, - bool decimals_as_float64) + type_id decimal_type_id) { switch (schema.kind) { case orc::BOOLEAN: return type_id::BOOL8; @@ -78,7 +78,7 @@ constexpr type_id to_type_id(const orc::SchemaType& schema, case orc::DATE: // There isn't a (DAYS -> np.dtype) mapping return (use_np_dtypes) ? type_id::TIMESTAMP_MILLISECONDS : type_id::TIMESTAMP_DAYS; - case orc::DECIMAL: return (decimals_as_float64) ? type_id::FLOAT64 : type_id::DECIMAL128; + case orc::DECIMAL: return decimal_type_id; // Need to update once cuDF plans to support map type case orc::MAP: case orc::LIST: return type_id::LIST; @@ -227,15 +227,26 @@ size_t gather_stream_info(const size_t stripe_index, } /** - * @brief Determines if a column should be converted from decimal to float + * @brief Determines cuDF type of an ORC Decimal column. */ -bool should_convert_decimal_column_to_float(const std::vector& columns_to_convert, - cudf::io::orc::metadata& metadata, - int column_index) +auto decimal_column_type(const std::vector& float64_columns, + const std::vector& decimal128_columns, + cudf::io::orc::metadata& metadata, + int column_index) { - return (std::find(columns_to_convert.begin(), - columns_to_convert.end(), - metadata.column_name(column_index)) != columns_to_convert.end()); + auto const& column_name = metadata.column_name(column_index); + auto is_column_in = [&](const std::vector& cols) { + return std::find(cols.cbegin(), cols.cend(), column_name) != cols.end(); + }; + + auto const user_selected_float64 = is_column_in(float64_columns); + auto const user_selected_decimal128 = is_column_in(decimal128_columns); + CUDF_EXPECTS(not user_selected_float64 or not user_selected_decimal128, + "Both decimal128 and float64 types selected for column " + column_name); + + if (user_selected_float64) return type_id::FLOAT64; + if (user_selected_decimal128) return type_id::DECIMAL128; + return type_id::DECIMAL64; } } // namespace @@ -729,12 +740,12 @@ std::unique_ptr reader::impl::create_empty_column(const size_type orc_co rmm::cuda_stream_view stream) { schema_info.name = _metadata.column_name(0, orc_col_id); - // If the column type is orc::DECIMAL see if the user - // desires it to be converted to float64 or not - auto const decimal_as_float64 = should_convert_decimal_column_to_float( - _decimal_cols_as_float, _metadata.per_file_metadata[0], orc_col_id); - auto const type = to_type_id( - _metadata.get_schema(orc_col_id), _use_np_dtypes, _timestamp_type.id(), decimal_as_float64); + auto const type = to_type_id( + _metadata.get_schema(orc_col_id), + _use_np_dtypes, + _timestamp_type.id(), + decimal_column_type( + _decimal_cols_as_float, decimal128_columns, _metadata.per_file_metadata[0], orc_col_id)); int32_t scale = 0; std::vector> child_columns; std::unique_ptr out_col = nullptr; @@ -785,7 +796,7 @@ std::unique_ptr reader::impl::create_empty_column(const size_type orc_co break; case orc::DECIMAL: - if (type == type_id::DECIMAL128) { + if (type == type_id::DECIMAL64 or type == type_id::DECIMAL128) { scale = -static_cast(_metadata.get_types()[orc_col_id].scale.value_or(0)); } out_col = make_empty_column(data_type(type, scale)); @@ -876,8 +887,9 @@ reader::impl::impl(std::vector>&& sources, // Enable or disable the conversion to numpy-compatible dtypes _use_np_dtypes = options.is_enabled_use_np_dtypes(); - // Control decimals conversion (float64 or int64 with optional scale) + // Control decimals conversion _decimal_cols_as_float = options.get_decimal_cols_as_float(); + decimal128_columns = options.get_decimal128_columns(); } timezone_table reader::impl::compute_timezone_table( @@ -937,14 +949,18 @@ table_with_metadata reader::impl::read(size_type skip_rows, // Get a list of column data types std::vector column_types; for (auto& col : columns_level) { - // If the column type is orc::DECIMAL see if the user - // desires it to be converted to float64 or not - auto const decimal_as_float64 = should_convert_decimal_column_to_float( - _decimal_cols_as_float, _metadata.per_file_metadata[0], col.id); auto col_type = to_type_id( - _metadata.get_col_type(col.id), _use_np_dtypes, _timestamp_type.id(), decimal_as_float64); + _metadata.get_col_type(col.id), + _use_np_dtypes, + _timestamp_type.id(), + decimal_column_type( + _decimal_cols_as_float, decimal128_columns, _metadata.per_file_metadata[0], col.id)); CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type"); - if (col_type == type_id::DECIMAL128) { + CUDF_EXPECTS( + (col_type != type_id::DECIMAL64) or (_metadata.get_col_type(col.id).precision <= 18), + "Precision of column " + std::string{_metadata.column_name(0, col.id)} + + " is over 18, use 128-bit Decimal."); + if (col_type == type_id::DECIMAL64 or col_type == type_id::DECIMAL128) { // sign of the scale is changed since cuDF follows c++ libraries like CNL // which uses negative scaling, but liborc and other libraries // follow positive scaling. @@ -1111,11 +1127,9 @@ table_with_metadata reader::impl::read(size_type skip_rows, .kind; // num_child_rows for a struct column will be same, for other nested types it will be // calculated. - chunk.num_child_rows = (chunk.type_kind != orc::STRUCT) ? 0 : chunk.num_rows; - auto const decimal_as_float64 = should_convert_decimal_column_to_float( - _decimal_cols_as_float, _metadata.per_file_metadata[0], columns_level[col_idx].id); - chunk.dtype_id = column_types[col_idx].id(); - chunk.decimal_scale = _metadata.per_file_metadata[stripe_source_mapping.source_idx] + chunk.num_child_rows = (chunk.type_kind != orc::STRUCT) ? 0 : chunk.num_rows; + chunk.dtype_id = column_types[col_idx].id(); + chunk.decimal_scale = _metadata.per_file_metadata[stripe_source_mapping.source_idx] .ff.types[columns_level[col_idx].id] .scale.value_or(0); diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index c9de2211d48..64e7cbc74e5 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -222,6 +222,7 @@ class reader::impl { bool _use_index = true; bool _use_np_dtypes = true; std::vector _decimal_cols_as_float; + std::vector decimal128_columns; data_type _timestamp_type{type_id::EMPTY}; reader_column_meta _col_meta; }; diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index f121e1108dc..7496fd6facd 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -1053,17 +1053,25 @@ static __device__ int Decode_Decimals(orc_bytestream_s* bs, else vals.f64[t] = f * kPow10[min(-scale, 39)]; } else { - // Since cuDF column stores just one scale, value needs to - // be adjusted to col_scale from val_scale. So the difference - // of them will be used to add 0s or remove digits. - int32_t scale = (t < numvals) ? col_scale - val_scale : 0; - if (scale >= 0) { - scale = min(scale, 27); - vals.i128[t] = (v * kPow5i[scale]) << scale; - } else // if (scale < 0) - { - scale = min(-scale, 27); - vals.i128[t] = (v / kPow5i[scale]) >> scale; + auto const scaled_value = [&]() { + // Since cuDF column stores just one scale, value needs to be adjusted to col_scale from + // val_scale. So the difference of them will be used to add 0s or remove digits. + int32_t scale = (t < numvals) ? col_scale - val_scale : 0; + if (scale >= 0) { + scale = min(scale, 27); + return (v * kPow5i[scale]) << scale; + } else // if (scale < 0) + { + scale = min(-scale, 27); + return (v / kPow5i[scale]) >> scale; + } + }(); + if (dtype_id == type_id::DECIMAL64) { + vals.i64[t] = scaled_value; + } else { + { + vals.i128[t] = scaled_value; + } } } } @@ -1700,7 +1708,13 @@ __global__ void __launch_bounds__(block_size) case DOUBLE: case LONG: static_cast(data_out)[row] = s->vals.u64[t + vals_skipped]; break; case DECIMAL: - static_cast<__uint128_t*>(data_out)[row] = s->vals.u128[t + vals_skipped]; + if (s->chunk.dtype_id == type_id::FLOAT64 or + s->chunk.dtype_id == type_id::DECIMAL64) { + static_cast(data_out)[row] = s->vals.u64[t + vals_skipped]; + } else { + // decimal128 + static_cast<__uint128_t*>(data_out)[row] = s->vals.u128[t + vals_skipped]; + } break; case MAP: case LIST: { diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index b29138f262c..77c4081dbbf 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -387,7 +387,9 @@ TEST_F(OrcWriterTest, MultiColumn) cudf_io::write_orc(out_opts); cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false); + cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + .use_index(false) + .decimal128_columns({"decimal_pos_scale", "decimal_neg_scale"}); auto result = cudf_io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); @@ -406,7 +408,7 @@ TEST_F(OrcWriterTest, MultiColumnWithNulls) auto col5_data = random_values(num_rows); auto col6_vals = random_values(num_rows); auto col6_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { - return numeric::decimal128{col6_vals[i], numeric::scale_type{2}}; + return numeric::decimal64{col6_vals[i], numeric::scale_type{2}}; }); auto col0_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 2); }); @@ -428,7 +430,7 @@ TEST_F(OrcWriterTest, MultiColumnWithNulls) column_wrapper col3{col3_data.begin(), col3_data.end(), col3_mask}; column_wrapper col4{col4_data.begin(), col4_data.end(), col4_mask}; column_wrapper col5{col5_data.begin(), col5_data.end(), col5_mask}; - column_wrapper col6{col6_data, col6_data + num_rows, col6_mask}; + column_wrapper col6{col6_data, col6_data + num_rows, col6_mask}; cudf::test::lists_column_wrapper col7{ {{9, 8}, {7, 6, 5}, {}, {4}, {3, 2, 1, 0}, {20, 21, 22, 23, 24}, {}, {66, 666}, {}, {-1, -2}}, col0_mask}; @@ -1143,10 +1145,10 @@ TEST_P(OrcWriterTestDecimal, Decimal64) // Using int16_t because scale causes values to overflow if they already require 32 bits auto const vals = random_values(num_rows); auto data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { - return numeric::decimal128{vals[i], numeric::scale_type{scale}}; + return numeric::decimal64{vals[i], numeric::scale_type{scale}}; }); auto mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 7 == 0; }); - column_wrapper col{data, data + num_rows, mask}; + column_wrapper col{data, data + num_rows, mask}; cudf::table_view tbl({static_cast(col)}); auto filepath = temp_env->get_temp_filepath("Decimal64.orc"); @@ -1190,13 +1192,12 @@ TEST_F(OrcWriterTest, Decimal32) cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); auto result = cudf_io::read_orc(in_opts); - // Need a 128bit decimal column for comparison since the reader always creates DECIMAL128 columns - auto data128 = cudf::detail::make_counting_transform_iterator(0, [&vals](auto i) { - return numeric::decimal128{vals[i], numeric::scale_type{2}}; + auto data64 = cudf::detail::make_counting_transform_iterator(0, [&vals](auto i) { + return numeric::decimal64{vals[i], numeric::scale_type{2}}; }); - column_wrapper col128{data128, data128 + num_rows, mask}; + column_wrapper col64{data64, data64 + num_rows, mask}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(col128, result.tbl->view().column(0)); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(col64, result.tbl->view().column(0)); } TEST_F(OrcStatisticsTest, Overflow) @@ -1412,4 +1413,40 @@ TEST_F(OrcReaderTest, NestedColumnSelection) ASSERT_EQ("field_b", result.metadata.schema_info[0].children[0].name); } +TEST_F(OrcReaderTest, DecimalOptions) +{ + constexpr auto num_rows = 10; + auto col_vals = random_values(num_rows); + auto col_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { + return numeric::decimal128{col_vals[i], numeric::scale_type{2}}; + }); + auto mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3 == 0; }); + + column_wrapper col{col_data, col_data + num_rows, mask}; + table_view expected({col}); + + cudf_io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("dec"); + + auto filepath = temp_env->get_temp_filepath("OrcDecimalOptions.orc"); + cudf_io::orc_writer_options out_opts = + cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected) + .metadata(&expected_metadata); + cudf_io::write_orc(out_opts); + + cudf_io::orc_reader_options valid_opts = + cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + .decimal128_columns({"dec", "fake_name"}) + .decimal_cols_as_float({"decc", "fake_name"}); + // Should not throw + EXPECT_NO_THROW(cudf_io::read_orc(valid_opts)); + + cudf_io::orc_reader_options invalid_opts = + cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + .decimal128_columns({"dec", "fake_name"}) + .decimal_cols_as_float({"dec", "fake_name"}); + // Should throw, options overlap + EXPECT_THROW(cudf_io::read_orc(invalid_opts), cudf::logic_error); +} + CUDF_TEST_PROGRAM_MAIN() diff --git a/python/cudf/cudf/_lib/cpp/io/orc.pxd b/python/cudf/cudf/_lib/cpp/io/orc.pxd index c855f112692..f0450483345 100644 --- a/python/cudf/cudf/_lib/cpp/io/orc.pxd +++ b/python/cudf/cudf/_lib/cpp/io/orc.pxd @@ -36,6 +36,7 @@ cdef extern from "cudf/io/orc.hpp" \ void enable_use_np_dtypes(bool val) except+ void set_timestamp_type(data_type type) except+ void set_decimal_cols_as_float(vector[string] val) except+ + void set_decimal128_columns(vector[string] val) except+ @staticmethod orc_reader_options_builder builder( @@ -57,6 +58,9 @@ cdef extern from "cudf/io/orc.hpp" \ orc_reader_options_builder& decimal_cols_as_float( vector[string] val ) except+ + orc_reader_options_builder& decimal128_columns( + vector[string] val + ) except+ orc_reader_options build() except+ From 4411d8e148faa193e0b21fa8bd3bd44c9df07af1 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 5 Nov 2021 12:55:48 -0700 Subject: [PATCH 090/112] use paths for decimal types API; iron out generated column names --- cpp/include/cudf/io/orc.hpp | 8 +++--- cpp/src/io/orc/orc.cpp | 16 ++++++++--- cpp/src/io/orc/reader_impl.cu | 6 ++--- cpp/src/io/orc/writer_impl.cu | 2 +- cpp/tests/io/orc_test.cpp | 50 +++++++++++++++++++++++++++++++++++ 5 files changed, 70 insertions(+), 12 deletions(-) diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 33f0232b4d0..fb1199fc166 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -139,7 +139,7 @@ class orc_reader_options { data_type get_timestamp_type() const { return _timestamp_type; } /** - * @brief Columns that should be converted from Decimal to Float64. + * @brief Fully qualified names of columns that should be converted from Decimal to Float64. */ std::vector const& get_decimal_cols_as_float() const { @@ -147,7 +147,7 @@ class orc_reader_options { } /** - * @brief Columns that should be read as 128-bit Decimal + * @brief Fully qualified names of columns that should be read as 128-bit Decimal. */ std::vector const& get_decimal128_columns() const { return _decimal128_columns; } @@ -218,7 +218,7 @@ class orc_reader_options { /** * @brief Set columns that should be converted from Decimal to Float64 * - * @param val Vector of column names. + * @param val Vector of fully qualified column names. */ void set_decimal_cols_as_float(std::vector val) { @@ -228,7 +228,7 @@ class orc_reader_options { /** * @brief Set columns that should be read as 128-bit Decimal * - * @param val Vector of column names. + * @param val Vector of fully qualified column names. */ void set_decimal128_columns(std::vector val) { diff --git a/cpp/src/io/orc/orc.cpp b/cpp/src/io/orc/orc.cpp index 89eac0c9901..44cea6169e4 100644 --- a/cpp/src/io/orc/orc.cpp +++ b/cpp/src/io/orc/orc.cpp @@ -18,6 +18,8 @@ #include "orc_field_reader.hpp" #include "orc_field_writer.hpp" +#include + #include #include @@ -472,10 +474,16 @@ void metadata::init_column_names() thrust::tabulate(column_names.begin(), column_names.end(), [&](auto col_id) { if (not column_has_parent(col_id)) return std::string{}; auto const& parent_field_names = ff.types[parent_id(col_id)].fieldNames; - // Child columns of lists don't have a name in ORC files, generate placeholder in that case - return field_index(col_id) < static_cast(parent_field_names.size()) - ? parent_field_names[field_index(col_id)] - : std::to_string(col_id); + if (field_index(col_id) < static_cast(parent_field_names.size())) { + return parent_field_names[field_index(col_id)]; + } + + // Generate names for list and map child columns + if (ff.types[parent_id(col_id)].subtypes.size() == 1) { + return std::to_string(lists_column_view::child_column_index); + } else { + return std::to_string(field_index(col_id)); + } }); column_paths.resize(get_num_columns()); diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index d35f2db90fc..f0612dcb42f 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -234,15 +234,15 @@ auto decimal_column_type(const std::vector& float64_columns, cudf::io::orc::metadata& metadata, int column_index) { - auto const& column_name = metadata.column_name(column_index); + auto const& column_path = metadata.column_path(column_index); auto is_column_in = [&](const std::vector& cols) { - return std::find(cols.cbegin(), cols.cend(), column_name) != cols.end(); + return std::find(cols.cbegin(), cols.cend(), column_path) != cols.end(); }; auto const user_selected_float64 = is_column_in(float64_columns); auto const user_selected_decimal128 = is_column_in(decimal128_columns); CUDF_EXPECTS(not user_selected_float64 or not user_selected_decimal128, - "Both decimal128 and float64 types selected for column " + column_name); + "Both decimal128 and float64 types selected for column " + column_path); if (user_selected_float64) return type_id::FLOAT64; if (user_selected_decimal128) return type_id::DECIMAL128; diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index e54c21efc47..2bf020d08a2 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -1776,7 +1776,7 @@ void writer::impl::write(table_view const& table) [&](column_in_metadata& col_meta, std::string default_name) { if (col_meta.get_name().empty()) col_meta.set_name(default_name); for (size_type i = 0; i < col_meta.num_children(); ++i) { - add_default_name(col_meta.child(i), col_meta.get_name() + "." + std::to_string(i)); + add_default_name(col_meta.child(i), std::to_string(i)); } }; for (size_t i = 0; i < table_meta->column_metadata.size(); ++i) { diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index 77c4081dbbf..4862bc74fab 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -1449,4 +1449,54 @@ TEST_F(OrcReaderTest, DecimalOptions) EXPECT_THROW(cudf_io::read_orc(invalid_opts), cudf::logic_error); } +TEST_F(OrcWriterTest, DecimalOptionsNested) +{ + auto const num_rows = 100; + + auto dec_vals = random_values(num_rows); + auto keys_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { + return numeric::decimal64{dec_vals[i], numeric::scale_type{2}}; + }); + auto vals_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { + return numeric::decimal128{dec_vals[i], numeric::scale_type{2}}; + }); + auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); + column_wrapper keys_col{keys_data, keys_data + num_rows, validity}; + column_wrapper vals_col{vals_data, vals_data + num_rows, validity}; + + auto struct_col = cudf::test::structs_column_wrapper({keys_col, vals_col}).release(); + + std::vector row_offsets(num_rows + 1); + std::iota(row_offsets.begin(), row_offsets.end(), 0); + cudf::test::fixed_width_column_wrapper offsets(row_offsets.begin(), row_offsets.end()); + + auto list_col = + cudf::make_lists_column(num_rows, + offsets.release(), + std::move(struct_col), + cudf::UNKNOWN_NULL_COUNT, + cudf::test::detail::make_null_mask(validity, validity + num_rows)); + + table_view expected({*list_col}); + + cudf_io::table_input_metadata expected_metadata(expected); + expected_metadata.column_metadata[0].set_name("lists"); + expected_metadata.column_metadata[0].child(1).child(0).set_name("dec64"); + expected_metadata.column_metadata[0].child(1).child(1).set_name("dec128"); + + auto filepath = temp_env->get_temp_filepath("OrcMultiColumn.orc"); + cudf_io::orc_writer_options out_opts = + cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected) + .metadata(&expected_metadata); + cudf_io::write_orc(out_opts); + + cudf_io::orc_reader_options in_opts = + cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + .use_index(false) + .decimal128_columns({"lists.1.dec128"}); + auto result = cudf_io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); +} + CUDF_TEST_PROGRAM_MAIN() From 61b3677cd3f6e7f2c5dc26be4c55681effa41134 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 5 Nov 2021 13:16:22 -0700 Subject: [PATCH 091/112] small clean up --- cpp/src/io/orc/stripe_data.cu | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 7496fd6facd..44f106c4f5c 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -122,14 +122,14 @@ struct orcdec_state_s { orc_rowdec_state_s rowdec; } u; union values { - uint8_t u8[block_size * 8]; - uint32_t u32[block_size * 2]; - int32_t i32[block_size * 2]; - uint64_t u64[block_size]; - int64_t i64[block_size]; - double f64[block_size]; - __int128_t i128[block_size]; // TMP - __uint128_t u128[block_size]; // TMP + uint8_t u8[block_size * 16]; + uint32_t u32[block_size * 4]; + int32_t i32[block_size * 4]; + uint64_t u64[block_size * 2]; + int64_t i64[block_size * 2]; + double f64[block_size * 2]; + __int128_t i128[block_size]; + __uint128_t u128[block_size]; } vals; }; From 7c01f21171de281c48ddd36915ac21437e8fff15 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Mon, 8 Nov 2021 16:04:14 -0500 Subject: [PATCH 092/112] ROLLING_TEST fix --- cpp/tests/rolling/rolling_test.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/tests/rolling/rolling_test.cpp b/cpp/tests/rolling/rolling_test.cpp index 038d692a323..f136fff83da 100644 --- a/cpp/tests/rolling/rolling_test.cpp +++ b/cpp/tests/rolling/rolling_test.cpp @@ -1203,13 +1203,11 @@ TYPED_TEST(FixedPointTests, MinMaxCountLagLeadNulls) using decimalXX = TypeParam; using RepType = cudf::device_storage_type_t; using fp_wrapper = cudf::test::fixed_point_column_wrapper; - using sum_type = std::conditional_t, __int128_t, int64_t>; - using fpsum_wrapper = cudf::test::fixed_point_column_wrapper; using fw_wrapper = cudf::test::fixed_width_column_wrapper; auto const scale = scale_type{-1}; auto const input = fp_wrapper{{42, 1729, 55, 343, 1, 2}, {1, 0, 1, 0, 1, 1}, scale}; - auto const expected_sum = fpsum_wrapper{{42, 97, 55, 56, 3, 3}, {1, 1, 1, 1, 1, 1}, scale}; + auto const expected_sum = fp_wrapper{{42, 97, 55, 56, 3, 3}, {1, 1, 1, 1, 1, 1}, scale}; auto const expected_min = fp_wrapper{{42, 42, 55, 1, 1, 1}, {1, 1, 1, 1, 1, 1}, scale}; auto const expected_max = fp_wrapper{{42, 55, 55, 55, 2, 2}, {1, 1, 1, 1, 1, 1}, scale}; auto const expected_lag = fp_wrapper{{0, 42, 1729, 55, 343, 1}, {0, 1, 0, 1, 0, 1}, scale}; From 63a00043297b97b4752608df8fbb554ae3394ca1 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Mon, 8 Nov 2021 16:07:08 -0500 Subject: [PATCH 093/112] clang-format --- cpp/tests/rolling/rolling_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/rolling/rolling_test.cpp b/cpp/tests/rolling/rolling_test.cpp index f136fff83da..1a31192f6a4 100644 --- a/cpp/tests/rolling/rolling_test.cpp +++ b/cpp/tests/rolling/rolling_test.cpp @@ -1203,7 +1203,7 @@ TYPED_TEST(FixedPointTests, MinMaxCountLagLeadNulls) using decimalXX = TypeParam; using RepType = cudf::device_storage_type_t; using fp_wrapper = cudf::test::fixed_point_column_wrapper; - using fw_wrapper = cudf::test::fixed_width_column_wrapper; + using fw_wrapper = cudf::test::fixed_width_column_wrapper; auto const scale = scale_type{-1}; auto const input = fp_wrapper{{42, 1729, 55, 343, 1, 2}, {1, 0, 1, 0, 1, 1}, scale}; From d3c589cd59c672277e6f85366cddad90e6eca22a Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Mon, 8 Nov 2021 16:12:36 -0500 Subject: [PATCH 094/112] Update meta.yaml --- conda/recipes/libcudf/meta.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 0e0fc816c62..df1f6bb3d37 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -116,6 +116,7 @@ test: - test -f $PREFIX/include/cudf/dictionary/update_keys.hpp - test -f $PREFIX/include/cudf/filling.hpp - test -f $PREFIX/include/cudf/fixed_point/fixed_point.hpp + - test -f $PREFIX/include/cudf/fixed_point/temporary.hpp - test -f $PREFIX/include/cudf/groupby.hpp - test -f $PREFIX/include/cudf/hashing.hpp - test -f $PREFIX/include/cudf/interop.hpp From 27a2e58b490322d4589e5a7b6f2e2b0aa64c9cfb Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Mon, 8 Nov 2021 16:25:07 -0500 Subject: [PATCH 095/112] Cmake formatting --- cpp/cmake/thirdparty/get_cucollections.cmake | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index 89e45809010..b9ddba27b7d 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -15,16 +15,15 @@ # This function finds cucollections and sets any additional necessary environment variables. function(find_and_configure_cucollections) - # Find or install cuCollections - rapids_cpm_find(cuco 0.0 - GLOBAL_TARGETS cuco::cuco - CPM_ARGS - GITHUB_REPOSITORY robertmaynard/cuCollections - GIT_TAG bf6a90db78516e099d07e845a39012dbcaa8de18 - OPTIONS "BUILD_TESTS OFF" - "BUILD_BENCHMARKS OFF" - "BUILD_EXAMPLES OFF" - ) + # Find or install cuCollections + rapids_cpm_find( + # cuCollections doesn't have a version yet + cuco 0.0 + GLOBAL_TARGETS cuco::cuco + CPM_ARGS GITHUB_REPOSITORY robertmaynard/cuCollections + GIT_TAG bf6a90db78516e099d07e845a39012dbcaa8de18 + OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF" + ) endfunction() find_and_configure_cucollections() From 9e2184f9607c2a7667372b6fce50fcee46356cd5 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Mon, 8 Nov 2021 17:48:24 -0500 Subject: [PATCH 096/112] Cleaning up has_atomic_support --- cpp/include/cudf/utilities/traits.cuh | 67 +++++++++++++++++++++++++++ cpp/src/groupby/hash/groupby.cu | 22 +-------- 2 files changed, 69 insertions(+), 20 deletions(-) create mode 100644 cpp/include/cudf/utilities/traits.cuh diff --git a/cpp/include/cudf/utilities/traits.cuh b/cpp/include/cudf/utilities/traits.cuh new file mode 100644 index 00000000000..88fcc5dd8c2 --- /dev/null +++ b/cpp/include/cudf/utilities/traits.cuh @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include + +namespace cudf { + +/** + * @addtogroup utility_types + * @{ + * @file + */ + +/** + * @brief Indicates whether the type `T` has support for atomics + * + * @tparam T The type to verify + * @return true `T` has support for atomics + * @return false `T` no support for atomics + */ +template +constexpr inline bool has_atomic_support() +{ + return cuda::std::atomic::is_always_lock_free; +} + +struct has_atomic_support_impl { + template + constexpr bool operator()() + { + return has_atomic_support(); + } +}; + +/** + * @brief Indicates whether `type` has support for atomics + * + * @param type The `data_type` to verify + * @return true `type` has support for atomics + * @return false `type` no support for atomics + */ +constexpr inline bool has_atomic_support(data_type type) +{ + return cudf::type_dispatcher(type, has_atomic_support_impl{}); +} + +/** @} */ + +} // namespace cudf diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index efd80485ac7..f062a132317 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -622,25 +623,6 @@ std::unique_ptr
groupby_null_templated(table_view const& keys, } // namespace -struct has_atomic_support_type_dispatcher { - template - bool operator()() - { - return cuda::std::atomic::is_always_lock_free; - } -}; - -/** - * @brief Indicates whether `type` has support for atomics - * - * @param type The `data_type` that is being checked - * @return `true` if `type` has support for atomics, `false` otherwise - */ -bool has_atomic_support(cudf::data_type const& type) -{ - return type_dispatcher(type, has_atomic_support_type_dispatcher{}); -} - /** * @brief Indicates if a set of aggregation requests can be satisfied with a * hash-based groupby implementation. @@ -654,7 +636,7 @@ bool has_atomic_support(cudf::data_type const& type) bool can_use_hash_groupby(table_view const& keys, host_span requests) { return std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) { - return has_atomic_support(r.values.type()) and + return cudf::has_atomic_support(r.values.type()) and std::all_of(r.aggregations.begin(), r.aggregations.end(), [](auto const& a) { return is_hash_aggregation(a->kind); }); From 8634dea7a90a688a14320c1f6c8e057e2c7742de Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Mon, 8 Nov 2021 18:28:53 -0500 Subject: [PATCH 097/112] Cleanup --- cpp/src/groupby/hash/groupby.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index f062a132317..58d2c7f09d6 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -42,8 +42,8 @@ #include #include #include -#include #include +#include #include #include From 4b5dbe2e787d89e02de341a499c863a4244418e0 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Mon, 8 Nov 2021 19:14:31 -0500 Subject: [PATCH 098/112] Use has_atomic_support --- cpp/include/cudf/detail/aggregation/aggregation.cuh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh index e05e83991cd..52447d0ba5b 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.cuh +++ b/cpp/include/cudf/detail/aggregation/aggregation.cuh @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -139,7 +140,7 @@ struct update_target_element< { if (source_has_nulls and source.is_null(source_index)) { return; } - if constexpr (not std::is_same_v) { + if constexpr (cudf::has_atomic_support()) { using Target = target_type_t; atomicMin(&target.element(target_index), static_cast(source.element(source_index))); @@ -166,7 +167,7 @@ struct update_target_element; using DeviceSource = device_storage_type_t; - if constexpr (not std::is_same_v) { + if constexpr (cudf::has_atomic_support()) { atomicMin(&target.element(target_index), static_cast(source.element(source_index))); } @@ -189,7 +190,7 @@ struct update_target_element< { if (source_has_nulls and source.is_null(source_index)) { return; } - if constexpr (not std::is_same_v) { + if constexpr (cudf::has_atomic_support()) { using Target = target_type_t; atomicMax(&target.element(target_index), static_cast(source.element(source_index))); @@ -216,7 +217,7 @@ struct update_target_element; using DeviceSource = device_storage_type_t; - if constexpr (not std::is_same_v) { + if constexpr (cudf::has_atomic_support()) { atomicMax(&target.element(target_index), static_cast(source.element(source_index))); } @@ -239,7 +240,7 @@ struct update_target_element< { if (source_has_nulls and source.is_null(source_index)) { return; } - if constexpr (not std::is_same_v) { + if constexpr (cudf::has_atomic_support()) { using Target = target_type_t; atomicAdd(&target.element(target_index), static_cast(source.element(source_index))); @@ -266,7 +267,7 @@ struct update_target_element; using DeviceSource = device_storage_type_t; - if constexpr (not std::is_same_v) { + if constexpr (cudf::has_atomic_support()) { atomicAdd(&target.element(target_index), static_cast(source.element(source_index))); } From 860bcbbaeae050a5bd5b47521a083a9854393772 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Mon, 8 Nov 2021 19:57:51 -0500 Subject: [PATCH 099/112] Fix silent failure --- cpp/include/cudf/detail/aggregation/aggregation.cuh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh index 52447d0ba5b..ad3b1042c95 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.cuh +++ b/cpp/include/cudf/detail/aggregation/aggregation.cuh @@ -144,6 +144,8 @@ struct update_target_element< using Target = target_type_t; atomicMin(&target.element(target_index), static_cast(source.element(source_index))); + } else { + cudf_assert(false and "Source has no atomic support."); } if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } @@ -170,6 +172,8 @@ struct update_target_element()) { atomicMin(&target.element(target_index), static_cast(source.element(source_index))); + } else { + cudf_assert(false and "DeviceSource has no atomic support."); } if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } @@ -194,6 +198,8 @@ struct update_target_element< using Target = target_type_t; atomicMax(&target.element(target_index), static_cast(source.element(source_index))); + } else { + cudf_assert(false and "Source has no atomic support."); } if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } @@ -220,6 +226,8 @@ struct update_target_element()) { atomicMax(&target.element(target_index), static_cast(source.element(source_index))); + } else { + cudf_assert(false and "DeviceSource has no atomic support."); } if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } @@ -244,6 +252,8 @@ struct update_target_element< using Target = target_type_t; atomicAdd(&target.element(target_index), static_cast(source.element(source_index))); + } else { + cudf_assert(false and "Source has no atomic support."); } if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } @@ -270,6 +280,8 @@ struct update_target_element()) { atomicAdd(&target.element(target_index), static_cast(source.element(source_index))); + } else { + cudf_assert(false and "DeviceSource has no atomic support."); } if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } From 89004c7e276c95f5d71b70fd66e689a94b22814f Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 9 Nov 2021 00:14:41 -0500 Subject: [PATCH 100/112] docs cleanup --- cpp/include/cudf/utilities/traits.cuh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/include/cudf/utilities/traits.cuh b/cpp/include/cudf/utilities/traits.cuh index 88fcc5dd8c2..43587ffa583 100644 --- a/cpp/include/cudf/utilities/traits.cuh +++ b/cpp/include/cudf/utilities/traits.cuh @@ -32,9 +32,9 @@ namespace cudf { /** * @brief Indicates whether the type `T` has support for atomics * - * @tparam T The type to verify - * @return true `T` has support for atomics - * @return false `T` no support for atomics + * @tparam T The type to verify + * @return true `T` has support for atomics + * @return false `T` no support for atomics */ template constexpr inline bool has_atomic_support() @@ -53,8 +53,8 @@ struct has_atomic_support_impl { /** * @brief Indicates whether `type` has support for atomics * - * @param type The `data_type` to verify - * @return true `type` has support for atomics + * @param type The `data_type` to verify + * @return true `type` has support for atomics * @return false `type` no support for atomics */ constexpr inline bool has_atomic_support(data_type type) From 12e5b20a22ab36bc2ff5043c785d44bffdf4b700 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 9 Nov 2021 14:55:25 -0500 Subject: [PATCH 101/112] Cleanup --- cpp/src/quantiles/quantiles_util.hpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cpp/src/quantiles/quantiles_util.hpp b/cpp/src/quantiles/quantiles_util.hpp index 142b0d18772..a0554833def 100644 --- a/cpp/src/quantiles/quantiles_util.hpp +++ b/cpp/src/quantiles/quantiles_util.hpp @@ -153,10 +153,7 @@ select_quantile(ValueAccessor get_value, size_type size, double q, interpolation } } -template ()>* = - nullptr> // TODO revisit if this is needed +template CUDA_HOST_DEVICE_CALLABLE Result select_quantile_data(Iterator begin, size_type size, double q, interpolation interp) { From 3ef6a09cdc1e08d8b0a57337658788dedd8851d8 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 10 Nov 2021 14:46:51 -0500 Subject: [PATCH 102/112] Additional decimal128 string tests --- cpp/tests/strings/fixed_point_tests.cpp | 27 +++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/cpp/tests/strings/fixed_point_tests.cpp b/cpp/tests/strings/fixed_point_tests.cpp index 3ceaba2637b..898d2105b63 100644 --- a/cpp/tests/strings/fixed_point_tests.cpp +++ b/cpp/tests/strings/fixed_point_tests.cpp @@ -84,6 +84,33 @@ TYPED_TEST(StringsFixedPointConvertTest, ToFixedPointVeryLarge) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); } +TEST_F(StringsConvertTest, ToFixedPointVeryLargeDecimal128) +{ + using namespace numeric; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + auto const strings = cudf::test::strings_column_wrapper( + {"1234000000000000000000", + "-876000000000000000000", + "5432e+17", + "-12E016", + "250000000000000000", + "-2800000000000000", + "", + "-0.0", + "170141183460469231731687303715884105727", + "17014118346046923173168730371588410572700000000000000000000"}); + + auto const type = cudf::data_type{cudf::type_to_id(), scale_type{20}}; + auto const results = cudf::strings::to_fixed_point(cudf::strings_column_view(strings), type); + auto const expected = fp_wrapper{ + {12, -8, 5, 0, 0, 0, 0, 0, 1701411834604692317, cuda::std::numeric_limits<__int128_t>::max()}, + scale_type{20}}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); +} + TYPED_TEST(StringsFixedPointConvertTest, ToFixedPointVerySmall) { using DecimalType = TypeParam; From ec8e74afb19f158ae638d7a7a85ca8ecf80f4dc2 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Thu, 11 Nov 2021 11:15:05 -0500 Subject: [PATCH 103/112] count_digits --- cpp/src/strings/convert/utilities.cuh | 44 +++++++++---------------- cpp/tests/strings/fixed_point_tests.cpp | 42 ++++++++++++++++++++--- 2 files changed, 53 insertions(+), 33 deletions(-) diff --git a/cpp/src/strings/convert/utilities.cuh b/cpp/src/strings/convert/utilities.cuh index 0006592e599..234ecf48f2e 100644 --- a/cpp/src/strings/convert/utilities.cuh +++ b/cpp/src/strings/convert/utilities.cuh @@ -96,38 +96,26 @@ __device__ inline size_type integer_to_string(IntegerType value, char* d_buffer) template constexpr size_type count_digits(IntegerType value) { - // TODO definitely broken if (value == 0) return 1; - bool is_negative = cuda::std::is_signed() ? (value < 0) : false; + bool const is_negative = cuda::std::is_signed() ? (value < 0) : false; // abs(std::numeric_limits::min()) is negative; // for all integer types, the max() and min() values have the same number of digits - value = (value == std::numeric_limits::min()) - ? std::numeric_limits::max() + value = (value == cuda::std::numeric_limits::min()) + ? cuda::std::numeric_limits::max() : cudf::util::absolute_value(value); - // largest 8-byte unsigned value is 18446744073709551615 (20 digits) - // clang-format off - size_type digits = - (value < 10 ? 1 : - (value < 100 ? 2 : - (value < 1000 ? 3 : - (value < 10000 ? 4 : - (value < 100000 ? 5 : - (value < 1000000 ? 6 : - (value < 10000000 ? 7 : - (value < 100000000 ? 8 : - (value < 1000000000 ? 9 : - (value < 10000000000 ? 10 : - (value < 100000000000 ? 11 : - (value < 1000000000000 ? 12 : - (value < 10000000000000 ? 13 : - (value < 100000000000000 ? 14 : - (value < 1000000000000000 ? 15 : - (value < 10000000000000000 ? 16 : - (value < 100000000000000000 ? 17 : - (value < 1000000000000000000 ? 18 : - (value < 10000000000000000000 ? 19 : - 20))))))))))))))))))); - // clang-format on + + auto const digits = [value] { + // largest 8-byte unsigned value is 18446744073709551615 (20 digits) + // largest 16-byte unsigned value is 340282366920938463463374607431768211455 (39 digits) + auto constexpr max_digits = std::is_same_v ? 39 : 20; + + size_type digits = 1; + __int128_t pow10 = 10; + for (; digits < max_digits; ++digits, pow10 *= 10) + if (value < pow10) break; + return digits; + }(); + return digits + static_cast(is_negative); } diff --git a/cpp/tests/strings/fixed_point_tests.cpp b/cpp/tests/strings/fixed_point_tests.cpp index 8688ea76800..fe94ffe23d7 100644 --- a/cpp/tests/strings/fixed_point_tests.cpp +++ b/cpp/tests/strings/fixed_point_tests.cpp @@ -84,7 +84,7 @@ TYPED_TEST(StringsFixedPointConvertTest, ToFixedPointVeryLarge) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); } -TEST_F(StringsConvertTest, ToFixedPointVeryLargeDecimal128) +TEST_F(StringsConvertTest, ToFixedPointDecimal128) { using namespace numeric; using RepType = cudf::device_storage_type_t; @@ -102,15 +102,47 @@ TEST_F(StringsConvertTest, ToFixedPointVeryLargeDecimal128) "170141183460469231731687303715884105727", "17014118346046923173168730371588410572700000000000000000000"}); - auto const type = cudf::data_type{cudf::type_to_id(), scale_type{20}}; + auto const scale = scale_type{20}; + auto const type = cudf::data_type{cudf::type_to_id(), scale}; auto const results = cudf::strings::to_fixed_point(cudf::strings_column_view(strings), type); - auto const expected = fp_wrapper{ - {12, -8, 5, 0, 0, 0, 0, 0, 1701411834604692317, cuda::std::numeric_limits<__int128_t>::max()}, - scale_type{20}}; + auto const max = cuda::std::numeric_limits<__int128_t>::max(); + auto const expected = fp_wrapper{{12, -8, 5, 0, 0, 0, 0, 0, 1701411834604692317, max}, scale}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); } +TEST_F(StringsConvertTest, FromFixedPointDecimal128) +{ + using namespace numeric; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + { + auto const input = fp_wrapper{{110}, numeric::scale_type{-2}}; + auto results = cudf::strings::from_fixed_point(input); + auto const expected = cudf::test::strings_column_wrapper({"1.10"}); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); + } + + { + auto const input = + fp_wrapper({110, cuda::std::numeric_limits<__int128_t>::max()}, numeric::scale_type{2}); + auto results = cudf::strings::from_fixed_point(input); + auto const expected = + cudf::test::strings_column_wrapper({"11000", "17014118346046923173168730371588410572700"}); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); + } + + { + auto const input = fp_wrapper({-222}, numeric::scale_type{0}); + auto results = cudf::strings::from_fixed_point(input); + auto const expected = cudf::test::strings_column_wrapper({"-222"}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); + } +} + TYPED_TEST(StringsFixedPointConvertTest, ToFixedPointVerySmall) { using DecimalType = TypeParam; From e365080b2b43929c53f71da592ada402975d2eef Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Thu, 11 Nov 2021 16:50:22 -0500 Subject: [PATCH 104/112] final string changes --- cpp/include/cudf/fixed_point/temporary.hpp | 10 ++++++++ .../strings/convert/convert_fixed_point.cu | 5 ++-- cpp/tests/strings/fixed_point_tests.cpp | 23 ++++++++++++++----- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/cpp/include/cudf/fixed_point/temporary.hpp b/cpp/include/cudf/fixed_point/temporary.hpp index 49c83090da7..90c98130fdc 100644 --- a/cpp/include/cudf/fixed_point/temporary.hpp +++ b/cpp/include/cudf/fixed_point/temporary.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include // Note: The versions are used in order for Jitify to work with our fixed_point type. // Jitify is needed for several algorithms (binaryop, rolling, etc) @@ -72,6 +73,15 @@ CUDA_HOST_DEVICE_CALLABLE auto max(T lhs, T rhs) { return lhs > rhs ? lhs : rhs; } +template +constexpr auto exp10(int32_t exponent) +{ + CUDF_EXPECTS(exponent >= 0, "Exponent must be greater than 0."); + BaseType value = 1; + while (exponent > 0) + value *= 10, --exponent; + return value; +} } // namespace detail } // namespace numeric diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu index 847e7c3566a..6944a8eb097 100644 --- a/cpp/src/strings/convert/convert_fixed_point.cu +++ b/cpp/src/strings/convert/convert_fixed_point.cu @@ -210,8 +210,7 @@ struct decimal_to_string_size_fn { if (scale >= 0) return count_digits(value) + scale; auto const abs_value = numeric::detail::abs(value); - auto const exp_ten = static_cast(exp10( - static_cast(-scale))); // TODO probably broken (might need numeric::detail::exp10) + auto const exp_ten = numeric::detail::exp10(-scale); auto const fraction = count_digits(abs_value % exp_ten); auto const num_zeros = std::max(0, (-scale - fraction)); return static_cast(value < 0) + // sign if negative @@ -253,7 +252,7 @@ struct decimal_to_string_fn { // fraction = abs(value) % (10^abs(scale)) if (value < 0) *d_buffer++ = '-'; // add sign auto const abs_value = numeric::detail::abs(value); - auto const exp_ten = static_cast(exp10(static_cast(-scale))); + auto const exp_ten = numeric::detail::exp10(-scale); auto const num_zeros = std::max(0, (-scale - count_digits(abs_value % exp_ten))); d_buffer += integer_to_string(abs_value / exp_ten, d_buffer); // add the integer part diff --git a/cpp/tests/strings/fixed_point_tests.cpp b/cpp/tests/strings/fixed_point_tests.cpp index fe94ffe23d7..b96706c5eb2 100644 --- a/cpp/tests/strings/fixed_point_tests.cpp +++ b/cpp/tests/strings/fixed_point_tests.cpp @@ -117,18 +117,29 @@ TEST_F(StringsConvertTest, FromFixedPointDecimal128) using RepType = cudf::device_storage_type_t; using fp_wrapper = cudf::test::fixed_point_column_wrapper; + auto constexpr max = cuda::std::numeric_limits<__int128_t>::max(); + { - auto const input = fp_wrapper{{110}, numeric::scale_type{-2}}; - auto results = cudf::strings::from_fixed_point(input); - auto const expected = cudf::test::strings_column_wrapper({"1.10"}); + auto const input = fp_wrapper{{110, max}, numeric::scale_type{-2}}; + auto results = cudf::strings::from_fixed_point(input); + auto const expected = + cudf::test::strings_column_wrapper({"1.10", "1701411834604692317316873037158841057.27"}); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); + } + + { + auto const input = fp_wrapper{{max}, numeric::scale_type{-38}}; + auto results = cudf::strings::from_fixed_point(input); + auto const expected = + cudf::test::strings_column_wrapper({"1.70141183460469231731687303715884105727"}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); } { - auto const input = - fp_wrapper({110, cuda::std::numeric_limits<__int128_t>::max()}, numeric::scale_type{2}); - auto results = cudf::strings::from_fixed_point(input); + auto const input = fp_wrapper({110, max}, numeric::scale_type{2}); + auto results = cudf::strings::from_fixed_point(input); auto const expected = cudf::test::strings_column_wrapper({"11000", "17014118346046923173168730371588410572700"}); From a0d5d0cc91fa1c1a58c6b1866418304aa9957081 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Fri, 12 Nov 2021 13:44:16 -0500 Subject: [PATCH 105/112] use enable_if --- .../cudf/detail/aggregation/aggregation.cuh | 99 ++++++++----------- 1 file changed, 41 insertions(+), 58 deletions(-) diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh index ad3b1042c95..85bfdd9c00d 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.cuh +++ b/cpp/include/cudf/detail/aggregation/aggregation.cuh @@ -132,7 +132,8 @@ struct update_target_element< aggregation::MIN, target_has_nulls, source_has_nulls, - std::enable_if_t() && !is_fixed_point()>> { + std::enable_if_t() && cudf::has_atomic_support() && + !is_fixed_point()>> { __device__ void operator()(mutable_column_device_view target, size_type target_index, column_device_view source, @@ -140,24 +141,19 @@ struct update_target_element< { if (source_has_nulls and source.is_null(source_index)) { return; } - if constexpr (cudf::has_atomic_support()) { - using Target = target_type_t; - atomicMin(&target.element(target_index), - static_cast(source.element(source_index))); - } else { - cudf_assert(false and "Source has no atomic support."); - } + using Target = target_type_t; + atomicMin(&target.element(target_index), + static_cast(source.element(source_index))); if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } } }; template -struct update_target_element()>> { + struct update_target_element < Source, + aggregation::MIN, target_has_nulls, source_has_nulls, + std::enable_if_t() && + cudf::has_atomic_support>()>> { __device__ void operator()(mutable_column_device_view target, size_type target_index, column_device_view source, @@ -169,12 +165,8 @@ struct update_target_element; using DeviceSource = device_storage_type_t; - if constexpr (cudf::has_atomic_support()) { - atomicMin(&target.element(target_index), - static_cast(source.element(source_index))); - } else { - cudf_assert(false and "DeviceSource has no atomic support."); - } + atomicMin(&target.element(target_index), + static_cast(source.element(source_index))); if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } } @@ -186,7 +178,8 @@ struct update_target_element< aggregation::MAX, target_has_nulls, source_has_nulls, - std::enable_if_t() && !is_fixed_point()>> { + std::enable_if_t() && cudf::has_atomic_support() && + !is_fixed_point()>> { __device__ void operator()(mutable_column_device_view target, size_type target_index, column_device_view source, @@ -194,24 +187,22 @@ struct update_target_element< { if (source_has_nulls and source.is_null(source_index)) { return; } - if constexpr (cudf::has_atomic_support()) { - using Target = target_type_t; - atomicMax(&target.element(target_index), - static_cast(source.element(source_index))); - } else { - cudf_assert(false and "Source has no atomic support."); - } + using Target = target_type_t; + atomicMax(&target.element(target_index), + static_cast(source.element(source_index))); if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } } }; template -struct update_target_element()>> { +struct update_target_element< + Source, + aggregation::MAX, + target_has_nulls, + source_has_nulls, + std::enable_if_t() && + cudf::has_atomic_support>()>> { __device__ void operator()(mutable_column_device_view target, size_type target_index, column_device_view source, @@ -223,12 +214,8 @@ struct update_target_element; using DeviceSource = device_storage_type_t; - if constexpr (cudf::has_atomic_support()) { - atomicMax(&target.element(target_index), - static_cast(source.element(source_index))); - } else { - cudf_assert(false and "DeviceSource has no atomic support."); - } + atomicMax(&target.element(target_index), + static_cast(source.element(source_index))); if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } } @@ -240,7 +227,8 @@ struct update_target_element< aggregation::SUM, target_has_nulls, source_has_nulls, - std::enable_if_t() && !is_fixed_point()>> { + std::enable_if_t() && cudf::has_atomic_support() && + !is_fixed_point()>> { __device__ void operator()(mutable_column_device_view target, size_type target_index, column_device_view source, @@ -248,24 +236,22 @@ struct update_target_element< { if (source_has_nulls and source.is_null(source_index)) { return; } - if constexpr (cudf::has_atomic_support()) { - using Target = target_type_t; - atomicAdd(&target.element(target_index), - static_cast(source.element(source_index))); - } else { - cudf_assert(false and "Source has no atomic support."); - } + using Target = target_type_t; + atomicAdd(&target.element(target_index), + static_cast(source.element(source_index))); if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } } }; template -struct update_target_element()>> { +struct update_target_element< + Source, + aggregation::SUM, + target_has_nulls, + source_has_nulls, + std::enable_if_t() && + cudf::has_atomic_support>()>> { __device__ void operator()(mutable_column_device_view target, size_type target_index, column_device_view source, @@ -277,12 +263,8 @@ struct update_target_element; using DeviceSource = device_storage_type_t; - if constexpr (cudf::has_atomic_support()) { - atomicAdd(&target.element(target_index), - static_cast(source.element(source_index))); - } else { - cudf_assert(false and "DeviceSource has no atomic support."); - } + atomicAdd(&target.element(target_index), + static_cast(source.element(source_index))); if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } } @@ -292,7 +274,8 @@ struct update_target_element From dd379506bd7dcbcfa0b0b11e1f2c135d9bc12a3e Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Fri, 12 Nov 2021 13:49:06 -0500 Subject: [PATCH 106/112] clang-format --- cpp/include/cudf/detail/aggregation/aggregation.cuh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh index 85bfdd9c00d..47aa7d18489 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.cuh +++ b/cpp/include/cudf/detail/aggregation/aggregation.cuh @@ -150,8 +150,11 @@ struct update_target_element< }; template - struct update_target_element < Source, - aggregation::MIN, target_has_nulls, source_has_nulls, +struct update_target_element< + Source, + aggregation::MIN, + target_has_nulls, + source_has_nulls, std::enable_if_t() && cudf::has_atomic_support>()>> { __device__ void operator()(mutable_column_device_view target, From fc4c1d1cbb5a97053122a102bc6bf93668b961b2 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Fri, 12 Nov 2021 15:36:56 -0500 Subject: [PATCH 107/112] Fix fix --- cpp/include/cudf/fixed_point/temporary.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/include/cudf/fixed_point/temporary.hpp b/cpp/include/cudf/fixed_point/temporary.hpp index 90c98130fdc..360794c8ade 100644 --- a/cpp/include/cudf/fixed_point/temporary.hpp +++ b/cpp/include/cudf/fixed_point/temporary.hpp @@ -17,7 +17,6 @@ #pragma once #include -#include // Note: The versions are used in order for Jitify to work with our fixed_point type. // Jitify is needed for several algorithms (binaryop, rolling, etc) @@ -76,7 +75,6 @@ CUDA_HOST_DEVICE_CALLABLE auto max(T lhs, T rhs) template constexpr auto exp10(int32_t exponent) { - CUDF_EXPECTS(exponent >= 0, "Exponent must be greater than 0."); BaseType value = 1; while (exponent > 0) value *= 10, --exponent; From 08da157ee591fca0aaf036ce3a76958078f9025a Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Sun, 14 Nov 2021 20:53:20 -0500 Subject: [PATCH 108/112] Cleanup --- cpp/include/cudf/fixed_point/temporary.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/include/cudf/fixed_point/temporary.hpp b/cpp/include/cudf/fixed_point/temporary.hpp index 360794c8ade..2b50e273517 100644 --- a/cpp/include/cudf/fixed_point/temporary.hpp +++ b/cpp/include/cudf/fixed_point/temporary.hpp @@ -72,6 +72,7 @@ CUDA_HOST_DEVICE_CALLABLE auto max(T lhs, T rhs) { return lhs > rhs ? lhs : rhs; } + template constexpr auto exp10(int32_t exponent) { From 201a091f64e06d58472020dad4760e76d204f900 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 16 Nov 2021 11:35:45 -0500 Subject: [PATCH 109/112] is_chrono min/max identity --- cpp/include/cudf/detail/utilities/device_operators.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/device_operators.cuh b/cpp/include/cudf/detail/utilities/device_operators.cuh index 0c85f448134..95605dc8a71 100644 --- a/cpp/include/cudf/detail/utilities/device_operators.cuh +++ b/cpp/include/cudf/detail/utilities/device_operators.cuh @@ -129,7 +129,7 @@ struct DeviceMin { !cudf::is_fixed_point()>* = nullptr> static constexpr T identity() { - if constexpr (cudf::is_chrono()) return std::numeric_limits::max(); + if constexpr (cudf::is_chrono()) return T::max(); return cuda::std::numeric_limits::max(); } @@ -171,7 +171,7 @@ struct DeviceMax { !cudf::is_fixed_point()>* = nullptr> static constexpr T identity() { - if constexpr (cudf::is_chrono()) return std::numeric_limits::lowest(); + if constexpr (cudf::is_chrono()) return T::min(); return cuda::std::numeric_limits::lowest(); } From f0afd8dcbf788ff5b13b8cbc367584182bb046a7 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 16 Nov 2021 12:57:18 -0500 Subject: [PATCH 110/112] Use exp10 --- cpp/include/cudf/fixed_point/fixed_point.hpp | 2 +- cpp/tests/strings/fixed_point_tests.cpp | 24 ++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index b356d857f32..e8223b53997 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -555,7 +555,7 @@ class fixed_point { { if (_scale < 0) { auto const av = detail::abs(_value); - Rep const n = std::pow(10, -_scale); // does this work for all values of __int128 + Rep const n = detail::exp10(-_scale); Rep const f = av % n; auto const num_zeros = std::max(0, (-_scale - static_cast(detail::to_string(f).size()))); diff --git a/cpp/tests/strings/fixed_point_tests.cpp b/cpp/tests/strings/fixed_point_tests.cpp index b96706c5eb2..d5ad57e7958 100644 --- a/cpp/tests/strings/fixed_point_tests.cpp +++ b/cpp/tests/strings/fixed_point_tests.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -301,3 +302,26 @@ TEST_F(StringsConvertTest, IsFixedPoint) {true, true, true, false, false, false, false, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected64_scaled); } + +TEST_F(StringsConvertTest, FixedPointStringConversionOperator) +{ + auto const max = cuda::std::numeric_limits<__int128_t>::max(); + + auto const x = numeric::decimal128{max, numeric::scale_type{-10}}; + EXPECT_EQ(static_cast(x), "17014118346046923173168730371.5884105727"); + + auto const y = numeric::decimal128{max, numeric::scale_type{10}}; + EXPECT_EQ(static_cast(y), "170141183460469231731687303710000000000"); + + auto const z = numeric::decimal128{numeric::scaled_integer{max, numeric::scale_type{10}}}; + EXPECT_EQ(static_cast(z), "1701411834604692317316873037158841057270000000000"); + + auto const a = numeric::decimal128{numeric::scaled_integer{max, numeric::scale_type{40}}}; + EXPECT_EQ(static_cast(a), "1701411834604692317316873037158841057270000000000000000000000000000000000000000"); + + auto const b = numeric::decimal128{numeric::scaled_integer{max, numeric::scale_type{-20}}}; + EXPECT_EQ(static_cast(b), "1701411834604692317.31687303715884105727"); + + auto const c = numeric::decimal128{numeric::scaled_integer{max, numeric::scale_type{-38}}}; + EXPECT_EQ(static_cast(c), "1.70141183460469231731687303715884105727"); +} \ No newline at end of file From 95ee95c633d49670c15bb4f63142ad710292f6c5 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 16 Nov 2021 14:00:06 -0500 Subject: [PATCH 111/112] clang-format --- cpp/tests/strings/fixed_point_tests.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/tests/strings/fixed_point_tests.cpp b/cpp/tests/strings/fixed_point_tests.cpp index d5ad57e7958..7c188d39f6f 100644 --- a/cpp/tests/strings/fixed_point_tests.cpp +++ b/cpp/tests/strings/fixed_point_tests.cpp @@ -317,7 +317,8 @@ TEST_F(StringsConvertTest, FixedPointStringConversionOperator) EXPECT_EQ(static_cast(z), "1701411834604692317316873037158841057270000000000"); auto const a = numeric::decimal128{numeric::scaled_integer{max, numeric::scale_type{40}}}; - EXPECT_EQ(static_cast(a), "1701411834604692317316873037158841057270000000000000000000000000000000000000000"); + EXPECT_EQ(static_cast(a), + "1701411834604692317316873037158841057270000000000000000000000000000000000000000"); auto const b = numeric::decimal128{numeric::scaled_integer{max, numeric::scale_type{-20}}}; EXPECT_EQ(static_cast(b), "1701411834604692317.31687303715884105727"); From 0b7c32e1821460bba38410cc44f2be0e430eb7ad Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Wed, 17 Nov 2021 04:11:22 +0530 Subject: [PATCH 112/112] Writer changes --- cpp/src/io/parquet/chunk_dict.cu | 1 + cpp/src/io/parquet/page_enc.cu | 29 +++++++++++++++-- cpp/src/io/parquet/writer_impl.cu | 9 ++++-- cpp/tests/io/parquet_test.cpp | 52 +++++++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 5 deletions(-) diff --git a/cpp/src/io/parquet/chunk_dict.cu b/cpp/src/io/parquet/chunk_dict.cu index 64b3dd69c0d..9617fca0af3 100644 --- a/cpp/src/io/parquet/chunk_dict.cu +++ b/cpp/src/io/parquet/chunk_dict.cu @@ -188,6 +188,7 @@ __global__ void __launch_bounds__(block_size, 1) return 4 + data_col.element(val_idx).size_bytes(); } case Type::FIXED_LEN_BYTE_ARRAY: + if (data_col.type().id() == type_id::DECIMAL128) { return 16; } default: cudf_assert(false && "Unsupported type for dictionary encoding"); return 0; } }(); diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 48490426db7..1af62d998be 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -176,7 +176,8 @@ __global__ void __launch_bounds__(block_size) } } dtype = s->col.physical_type; - dtype_len = (dtype == INT96) ? 12 + dtype_len = (dtype == FIXED_LEN_BYTE_ARRAY) ? 16 + : (dtype == INT96) ? 12 : (dtype == INT64 || dtype == DOUBLE) ? 8 : (dtype == BOOLEAN) ? 1 : 4; @@ -878,7 +879,8 @@ __global__ void __launch_bounds__(128, 8) // Encode data values __syncthreads(); dtype = s->col.physical_type; - dtype_len_out = (dtype == INT96) ? 12 + dtype_len_out = (dtype == FIXED_LEN_BYTE_ARRAY) ? 16 + : (dtype == INT96) ? 12 : (dtype == INT64 || dtype == DOUBLE) ? 8 : (dtype == BOOLEAN) ? 1 : 4; @@ -1087,6 +1089,29 @@ __global__ void __launch_bounds__(128, 8) dst[pos + 3] = v >> 24; if (v != 0) memcpy(dst + pos + 4, str.data(), v); } break; + case FIXED_LEN_BYTE_ARRAY: { + if (s->col.leaf_column->type().id() == type_id::DECIMAL128) { + // When using FIXED_LEN_BYTE_ARRAY for decimals, the rep is encoded in big-endian + auto v = s->col.leaf_column->element(val_idx).value(); + auto v_ = reinterpret_cast(&v); + dst[pos + 0] = v_[15]; + dst[pos + 1] = v_[14]; + dst[pos + 2] = v_[13]; + dst[pos + 3] = v_[12]; + dst[pos + 4] = v_[11]; + dst[pos + 5] = v_[10]; + dst[pos + 6] = v_[9]; + dst[pos + 7] = v_[8]; + dst[pos + 8] = v_[7]; + dst[pos + 9] = v_[6]; + dst[pos + 10] = v_[5]; + dst[pos + 11] = v_[4]; + dst[pos + 12] = v_[3]; + dst[pos + 13] = v_[2]; + dst[pos + 14] = v_[1]; + dst[pos + 15] = v_[0]; + } + } break; } } __syncthreads(); diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 2c7d745bb4c..c01a4f2f9d1 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -343,7 +343,9 @@ struct leaf_schema_fn { col_schema.type = Type::INT64; col_schema.stats_dtype = statistics_dtype::dtype_decimal64; } else if (std::is_same_v) { - CUDF_FAIL("decimal128 currently not supported for parquet writer"); + col_schema.type = Type::FIXED_LEN_BYTE_ARRAY; + col_schema.type_length = 16; + col_schema.stats_dtype = statistics_dtype::dtype_decimal128; } else { CUDF_FAIL("Unsupported fixed point type for parquet writer"); } @@ -1208,8 +1210,9 @@ void writer::impl::write(table_view const& table) hostdevice_2dvector chunks(num_rowgroups, num_columns, stream); for (uint32_t r = 0, global_r = global_rowgroup_base, f = 0, start_row = 0; r < num_rowgroups; r++, global_r++) { - uint32_t fragments_in_chunk = (uint32_t)( - (md.row_groups[global_r].num_rows + max_page_fragment_size - 1) / max_page_fragment_size); + uint32_t fragments_in_chunk = + (uint32_t)((md.row_groups[global_r].num_rows + max_page_fragment_size - 1) / + max_page_fragment_size); md.row_groups[global_r].total_byte_size = 0; md.row_groups[global_r].columns.resize(num_columns); for (int i = 0; i < num_columns; i++) { diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 3bae8d7ab1e..1438c3a28c0 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -463,6 +463,58 @@ TEST_F(ParquetWriterTest, MultiColumn) cudf::test::expect_metadata_equal(expected_metadata, result.metadata); } +TEST_F(ParquetWriterTest, DecimalColumns) +{ + constexpr auto num_rows = 5; + + // auto col0_data = random_values(num_rows); + auto col6_vals = random_values(num_rows); + auto col7_vals = random_values(num_rows); + auto col6_data = cudf::detail::make_counting_transform_iterator(0, [col6_vals](auto i) { + return numeric::decimal32{col6_vals[i], numeric::scale_type{5}}; + }); + auto col7_data = cudf::detail::make_counting_transform_iterator(0, [col6_vals](auto i) { + return numeric::decimal64{col6_vals[i], numeric::scale_type{5}}; + }); + auto col8_data = cudf::detail::make_counting_transform_iterator(0, [col6_vals](auto i) { + return numeric::decimal128{i * 10000, numeric::scale_type{2}}; + }); + auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); + + // column_wrapper col0{ + // col0_data.begin(), col0_data.end(), validity}; + column_wrapper col6{col6_data, col6_data + num_rows, validity}; + column_wrapper col7{col7_data, col7_data + num_rows, validity}; + column_wrapper col8{col8_data, col8_data + num_rows, validity}; + + std::vector> cols; + // cols.push_back(col0.release()); + cols.push_back(col6.release()); + cols.push_back(col7.release()); + cols.push_back(col8.release()); + auto expected = std::make_unique
(std::move(cols)); + EXPECT_EQ(3, expected->num_columns()); + + cudf_io::table_input_metadata expected_metadata(*expected); + // expected_metadata.column_metadata[0].set_name( "bools"); + expected_metadata.column_metadata[0].set_name("decimal32s").set_decimal_precision(10); + expected_metadata.column_metadata[1].set_name("decimal64s").set_decimal_precision(10); + expected_metadata.column_metadata[2].set_name("decimal128s").set_decimal_precision(10); + + auto filepath = ("MultiColumn.parquet"); + cudf_io::parquet_writer_options out_opts = + cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected->view()) + .metadata(&expected_metadata); + cudf_io::write_parquet(out_opts); + + cudf_io::parquet_reader_options in_opts = + cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); + auto result = cudf_io::read_parquet(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view()); + cudf::test::expect_metadata_equal(expected_metadata, result.metadata); +} + TEST_F(ParquetWriterTest, MultiColumnWithNulls) { constexpr auto num_rows = 100;