From 5491cc789bbfbaad7099124dcfe004719e7f013c Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Thu, 2 Dec 2021 03:30:50 +0530 Subject: [PATCH] Fix memory error due to lambda return type deduction limitation (#9778) Fixes #9703 replace device lambda with device functor with return type. (due to [14. extended-lambda-restrictions](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#extended-lambda-restrictions) ) ~add `__host__` to lambda for nvcc return type deduction to work properly.~ ~replaced `auto` (generic lambda) with `size_type`.~ fixes shared memory write error caused in #9703 Authors: - Karthikeyan (https://github.com/karthikeyann) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - David Wendt (https://github.com/davidwendt) - Jake Hemstad (https://github.com/jrhemstad) URL: https://github.com/rapidsai/cudf/pull/9778 --- cpp/src/sort/rank.cu | 13 +++++++++---- cpp/tests/sort/rank_test.cpp | 14 ++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu index e9589e6c4b3..de0a44e3234 100644 --- a/cpp/src/sort/rank.cu +++ b/cpp/src/sort/rank.cu @@ -194,6 +194,12 @@ void rank_max(cudf::device_span group_keys, stream); } +// Returns index, count +template +struct index_counter { + __device__ T operator()(size_type i) { return T{i, 1}; } +}; + void rank_average(cudf::device_span group_keys, column_view sorted_order_view, mutable_column_view rank_mutable_view, @@ -208,10 +214,9 @@ void rank_average(cudf::device_span group_keys, using MinCount = thrust::pair; tie_break_ranks_transform( group_keys, - cudf::detail::make_counting_transform_iterator(1, - [] __device__(auto i) { - return MinCount{i, 1}; - }), + // Use device functor with return type. Cannot use device lambda due to limitation. + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#extended-lambda-restrictions + cudf::detail::make_counting_transform_iterator(1, index_counter{}), sorted_order_view, rank_mutable_view.begin(), [] __device__(auto rank_count1, auto rank_count2) { diff --git a/cpp/tests/sort/rank_test.cpp b/cpp/tests/sort/rank_test.cpp index 94e389fc7ce..926ad1e203e 100644 --- a/cpp/tests/sort/rank_test.cpp +++ b/cpp/tests/sort/rank_test.cpp @@ -410,5 +410,19 @@ TYPED_TEST(Rank, min_desc_bottom_pct) this->run_all_tests(rank_method::MIN, desc_bottom, col1_rank, col2_rank, col3_rank, true); } +struct RankLarge : public BaseFixture { +}; + +TEST_F(RankLarge, average_large) +{ + // testcase of https://github.com/rapidsai/cudf/issues/9703 + auto iter = thrust::counting_iterator(0); + fixed_width_column_wrapper col1(iter, iter + 10558); + auto result = + cudf::rank(col1, rank_method::AVERAGE, {}, null_policy::EXCLUDE, null_order::AFTER, false); + fixed_width_column_wrapper expected(iter + 1, iter + 10559); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected); +} + } // namespace test } // namespace cudf