From a78fdd51ca994d2072085f5c70b33fc3452a3484 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Tue, 7 Mar 2023 07:54:41 -0500 Subject: [PATCH] Fix cudf::hash_partition kernel launch error with decimal128 types (#12863) Fixes `cudf::hash_partition` error when using `decimal128` column types. The internal optimized path, `copy_block_partitions`, uses shared-memory for copying fixed-width type column elements. For `int128_t` type, the shared-memory needed (~64KB) is larger than the maximum size (~48KB) allowed causing a kernel launch failure. The optimized path is now restricted to only fixed-width types `int64_t` and below. The `int128_t` column types will fall through to the gather-map pattern instead. Accommodating this type in the existing copy-block implementation would likely penalize the performance of the other fixed-width types. If the new implementation becomes insufficient, we could explore a special optimized path in the future for the single type `int128_t`. An existing gtest for fixed-point types was updated to include a `decimal128` column to catch this kind of error in the future. Closes #12852 Authors: - David Wendt (https://github.com/davidwendt) - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Nghia Truong (https://github.com/ttnghia) - Divye Gala (https://github.com/divyegala) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/12863 --- cpp/src/partitioning/partitioning.cu | 24 +++++++++++++------ .../partitioning/hash_partition_test.cpp | 14 +++++------ 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index edf5d6d6612..54dffc85aca 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include @@ -36,6 +35,9 @@ #include #include +#include +#include + namespace cudf { namespace { // Launch configuration for optimized hash partition @@ -389,7 +391,15 @@ rmm::device_uvector compute_gather_map(size_type num_rows, } struct copy_block_partitions_dispatcher { - template ()>* = nullptr> + template + constexpr static bool is_copy_block_supported() + { + // The shared-memory used for fixed-width types in the copy_block_partitions_impl function + // will be too large for any DataType greater than int64_t. + return is_fixed_width() && (sizeof(DataType) <= sizeof(int64_t)); + } + + template ())> std::unique_ptr operator()(column_view const& input, const size_type num_partitions, size_type const* row_partition_numbers, @@ -416,7 +426,7 @@ struct copy_block_partitions_dispatcher { return std::make_unique(input.type(), input.size(), std::move(output)); } - template ()>* = nullptr> + template ())> std::unique_ptr operator()(column_view const& input, const size_type num_partitions, size_type const* row_partition_numbers, @@ -713,7 +723,7 @@ struct dispatch_map_type { } // namespace namespace detail { -namespace local { +namespace { template