From 5f05c180b80b70fc09ea58aef2494c57edc44b9c Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Tue, 17 Oct 2023 11:32:12 -0400 Subject: [PATCH] Enable indexalator for device code (#14206) Enables indexalator to be instantiated from device code. Also add gtests for the output indexalator. This change helps enable for the offset-normalizing-iterator #14234 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Yunsong Wang (https://github.com/PointKernel) URL: https://github.com/rapidsai/cudf/pull/14206 --- .../cudf/detail/normalizing_iterator.cuh | 40 +++++-- cpp/tests/iterator/indexalator_test.cu | 100 ++++++++++++++++++ 2 files changed, 131 insertions(+), 9 deletions(-) diff --git a/cpp/include/cudf/detail/normalizing_iterator.cuh b/cpp/include/cudf/detail/normalizing_iterator.cuh index 51b3133f84f..35a695d47df 100644 --- a/cpp/include/cudf/detail/normalizing_iterator.cuh +++ b/cpp/include/cudf/detail/normalizing_iterator.cuh @@ -34,7 +34,7 @@ namespace detail { */ template struct base_normalator { - static_assert(std::is_integral_v); + static_assert(cudf::is_index_type()); using difference_type = std::ptrdiff_t; using value_type = Integer; using pointer = Integer*; @@ -202,13 +202,34 @@ struct base_normalator { return static_cast(*this).p_ >= rhs.p_; } + private: + struct integer_sizeof_fn { + template ()>* = nullptr> + CUDF_HOST_DEVICE constexpr std::size_t operator()() const + { +#ifndef __CUDA_ARCH__ + CUDF_FAIL("only integral types are supported"); +#else + CUDF_UNREACHABLE("only integral types are supported"); +#endif + } + template ()>* = nullptr> + CUDF_HOST_DEVICE constexpr std::size_t operator()() const noexcept + { + return sizeof(T); + } + }; + protected: /** * @brief Constructor assigns width and type member variables for base class. */ - explicit base_normalator(data_type dtype) : width_(size_of(dtype)), dtype_(dtype) {} + explicit CUDF_HOST_DEVICE base_normalator(data_type dtype) : dtype_(dtype) + { + width_ = static_cast(type_dispatcher(dtype, integer_sizeof_fn{})); + } - int width_; /// integer type width = 1,2,4, or 8 + int32_t width_; /// integer type width = 1,2,4, or 8 data_type dtype_; /// for type-dispatcher calls }; @@ -244,12 +265,12 @@ struct input_normalator : base_normalator, Integer> { * @brief Dispatch functor for resolving a Integer value from any integer type */ struct normalize_type { - template >* = nullptr> + template ()>* = nullptr> __device__ Integer operator()(void const* tp) { return static_cast(*static_cast(tp)); } - template >* = nullptr> + template ()>* = nullptr> __device__ Integer operator()(void const*) { CUDF_UNREACHABLE("only integral types are supported"); @@ -274,9 +295,10 @@ struct input_normalator : base_normalator, Integer> { * @param data Pointer to an integer array in device memory. * @param data_type Type of data in data */ - input_normalator(void const* data, data_type dtype) + CUDF_HOST_DEVICE input_normalator(void const* data, data_type dtype, cudf::size_type offset = 0) : base_normalator, Integer>(dtype), p_{static_cast(data)} { + p_ += offset * this->width_; } char const* p_; /// pointer to the integer data in device memory @@ -327,12 +349,12 @@ struct output_normalator : base_normalator, Integer> * @brief Dispatch functor for setting the index value from a size_type value. */ struct normalize_type { - template >* = nullptr> + template ()>* = nullptr> __device__ void operator()(void* tp, Integer const value) { (*static_cast(tp)) = static_cast(value); } - template >* = nullptr> + template ()>* = nullptr> __device__ void operator()(void*, Integer const) { CUDF_UNREACHABLE("only index types are supported"); @@ -355,7 +377,7 @@ struct output_normalator : base_normalator, Integer> * @param data Pointer to an integer array in device memory. * @param data_type Type of data in data */ - output_normalator(void* data, data_type dtype) + CUDF_HOST_DEVICE output_normalator(void* data, data_type dtype) : base_normalator, Integer>(dtype), p_{static_cast(data)} { } diff --git a/cpp/tests/iterator/indexalator_test.cu b/cpp/tests/iterator/indexalator_test.cu index 1ff7f4c42a5..3e8bcd5cb0d 100644 --- a/cpp/tests/iterator/indexalator_test.cu +++ b/cpp/tests/iterator/indexalator_test.cu @@ -20,9 +20,13 @@ #include +#include +#include #include #include #include +#include +#include using TestingTypes = cudf::test::IntegralTypesNotBool; @@ -94,3 +98,99 @@ TYPED_TEST(IndexalatorTest, optional_iterator) auto it_dev = cudf::detail::indexalator_factory::make_input_optional_iterator(d_col); this->iterator_test_thrust(expected_values, it_dev, host_values.size()); } + +template +struct transform_fn { + __device__ cudf::size_type operator()(Integer v) + { + return static_cast(v) + static_cast(v); + } +}; + +TYPED_TEST(IndexalatorTest, output_iterator) +{ + using T = TypeParam; + + auto d_col1 = + cudf::test::fixed_width_column_wrapper({0, 6, 7, 14, 23, 33, 43, 45, 63}); + auto d_col2 = + cudf::test::fixed_width_column_wrapper({0, 0, 0, 0, 0, 0, 0, 0, 0}); + auto itr = cudf::detail::indexalator_factory::make_output_iterator(d_col2); + auto input = cudf::column_view(d_col1); + auto stream = cudf::get_default_stream(); + + auto map = cudf::test::fixed_width_column_wrapper({0, 2, 4, 6, 8, 1, 3, 5, 7}); + auto d_map = cudf::column_view(map); + thrust::gather( + rmm::exec_policy_nosync(stream), d_map.begin(), d_map.end(), input.begin(), itr); + auto expected = + cudf::test::fixed_width_column_wrapper({0, 7, 23, 43, 63, 6, 14, 33, 45}); + thrust::scatter( + rmm::exec_policy_nosync(stream), input.begin(), input.end(), d_map.begin(), itr); + expected = + cudf::test::fixed_width_column_wrapper({0, 33, 6, 43, 7, 45, 14, 63, 23}); + + thrust::transform( + rmm::exec_policy(stream), input.begin(), input.end(), itr, transform_fn{}); + expected = + cudf::test::fixed_width_column_wrapper({0, 12, 14, 28, 46, 66, 86, 90, 126}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); + + thrust::fill(rmm::exec_policy(stream), itr, itr + input.size(), 77); + expected = + cudf::test::fixed_width_column_wrapper({77, 77, 77, 77, 77, 77, 77, 77, 77}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); + + thrust::sequence(rmm::exec_policy(stream), itr, itr + input.size()); + expected = cudf::test::fixed_width_column_wrapper({0, 1, 2, 3, 4, 5, 6, 7, 8}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); + + auto indices = + cudf::test::fixed_width_column_wrapper({0, 10, 20, 30, 40, 50, 60, 70, 80}); + auto d_indices = cudf::column_view(indices); + thrust::lower_bound(rmm::exec_policy(stream), + d_indices.begin(), + d_indices.end(), + input.begin(), + input.end(), + itr); + expected = cudf::test::fixed_width_column_wrapper({0, 1, 1, 2, 3, 4, 5, 5, 7}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); +} + +/** + * For testing creating and using the indexalator in device code. + */ +struct device_functor_fn { + cudf::column_device_view const d_col; + __device__ cudf::size_type operator()(cudf::size_type idx) + { + auto itr = cudf::detail::input_indexalator(d_col.head(), d_col.type()); + return itr[idx] * 3; + } +}; + +TYPED_TEST(IndexalatorTest, device_indexalator) +{ + using T = TypeParam; + + auto d_col1 = + cudf::test::fixed_width_column_wrapper({0, 6, 7, 14, 23, 33, 43, 45, 63}); + auto d_col2 = + cudf::test::fixed_width_column_wrapper({0, 0, 0, 0, 0, 0, 0, 0, 0}); + auto input = cudf::column_view(d_col1); + auto output = cudf::mutable_column_view(d_col2); + auto stream = cudf::get_default_stream(); + + auto d_input = cudf::column_device_view::create(input, stream); + + thrust::transform(rmm::exec_policy(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(input.size()), + output.begin(), + device_functor_fn{*d_input}); + + auto expected = + cudf::test::fixed_width_column_wrapper({0, 18, 21, 42, 69, 99, 129, 135, 189}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); +}