From 32e1029ab95659acd8ec77cae1f187cefaf51e9c Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 26 Sep 2023 23:27:00 -0400 Subject: [PATCH 01/20] Enable indexalator for device code --- .../cudf/detail/normalizing_iterator.cuh | 30 +++++- cpp/tests/iterator/indexalator_test.cu | 100 ++++++++++++++++++ 2 files changed, 126 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/detail/normalizing_iterator.cuh b/cpp/include/cudf/detail/normalizing_iterator.cuh index 51b3133f84f..71f13c432d6 100644 --- a/cpp/include/cudf/detail/normalizing_iterator.cuh +++ b/cpp/include/cudf/detail/normalizing_iterator.cuh @@ -202,11 +202,32 @@ struct base_normalator { return static_cast(*this).p_ >= rhs.p_; } + private: + struct integer_sizeof_fn { + template ()>* = nullptr> + constexpr int operator()() const + { +#ifndef __CUDA_ARCH__ + CUDF_FAIL("only integral types are supported"); +#else + CUDF_UNREACHABLE("only integral types are supported"); +#endif + } + template ()>* = nullptr> + constexpr int operator()() const noexcept + { + return sizeof(T); + } + }; + protected: /** * @brief Constructor assigns width and type member variables for base class. */ - explicit base_normalator(data_type dtype) : width_(size_of(dtype)), dtype_(dtype) {} + explicit CUDF_HOST_DEVICE base_normalator(data_type dtype) : dtype_(dtype) + { + width_ = type_dispatcher(dtype, integer_sizeof_fn{}); + } int width_; /// integer type width = 1,2,4, or 8 data_type dtype_; /// for type-dispatcher calls @@ -274,9 +295,10 @@ struct input_normalator : base_normalator, Integer> { * @param data Pointer to an integer array in device memory. * @param data_type Type of data in data */ - input_normalator(void const* data, data_type dtype) + CUDF_HOST_DEVICE input_normalator(void const* data, data_type dtype, cudf::size_type offset = 0) : base_normalator, Integer>(dtype), p_{static_cast(data)} { + p_ += offset * this->width_; } char const* p_; /// pointer to the integer data in device memory @@ -309,7 +331,7 @@ struct output_normalator : base_normalator, Integer> * @brief Indirection operator returns this iterator instance in order * to capture the `operator=(Integer)` calls. */ - __device__ inline output_normalator const& operator*() const { return *this; } + __device__ inline output_normalator const operator*() const { return *this; } /** * @brief Array subscript operator returns an iterator instance at the specified `idx` position. @@ -355,7 +377,7 @@ struct output_normalator : base_normalator, Integer> * @param data Pointer to an integer array in device memory. * @param data_type Type of data in data */ - output_normalator(void* data, data_type dtype) + CUDF_HOST_DEVICE output_normalator(void* data, data_type dtype) : base_normalator, Integer>(dtype), p_{static_cast(data)} { } diff --git a/cpp/tests/iterator/indexalator_test.cu b/cpp/tests/iterator/indexalator_test.cu index 1ff7f4c42a5..3e8bcd5cb0d 100644 --- a/cpp/tests/iterator/indexalator_test.cu +++ b/cpp/tests/iterator/indexalator_test.cu @@ -20,9 +20,13 @@ #include +#include +#include #include #include #include +#include +#include using TestingTypes = cudf::test::IntegralTypesNotBool; @@ -94,3 +98,99 @@ TYPED_TEST(IndexalatorTest, optional_iterator) auto it_dev = cudf::detail::indexalator_factory::make_input_optional_iterator(d_col); this->iterator_test_thrust(expected_values, it_dev, host_values.size()); } + +template +struct transform_fn { + __device__ cudf::size_type operator()(Integer v) + { + return static_cast(v) + static_cast(v); + } +}; + +TYPED_TEST(IndexalatorTest, output_iterator) +{ + using T = TypeParam; + + auto d_col1 = + cudf::test::fixed_width_column_wrapper({0, 6, 7, 14, 23, 33, 43, 45, 63}); + auto d_col2 = + cudf::test::fixed_width_column_wrapper({0, 0, 0, 0, 0, 0, 0, 0, 0}); + auto itr = cudf::detail::indexalator_factory::make_output_iterator(d_col2); + auto input = cudf::column_view(d_col1); + auto stream = cudf::get_default_stream(); + + auto map = cudf::test::fixed_width_column_wrapper({0, 2, 4, 6, 8, 1, 3, 5, 7}); + auto d_map = cudf::column_view(map); + thrust::gather( + rmm::exec_policy_nosync(stream), d_map.begin(), d_map.end(), input.begin(), itr); + auto expected = + cudf::test::fixed_width_column_wrapper({0, 7, 23, 43, 63, 6, 14, 33, 45}); + thrust::scatter( + rmm::exec_policy_nosync(stream), input.begin(), input.end(), d_map.begin(), itr); + expected = + cudf::test::fixed_width_column_wrapper({0, 33, 6, 43, 7, 45, 14, 63, 23}); + + thrust::transform( + rmm::exec_policy(stream), input.begin(), input.end(), itr, transform_fn{}); + expected = + cudf::test::fixed_width_column_wrapper({0, 12, 14, 28, 46, 66, 86, 90, 126}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); + + thrust::fill(rmm::exec_policy(stream), itr, itr + input.size(), 77); + expected = + cudf::test::fixed_width_column_wrapper({77, 77, 77, 77, 77, 77, 77, 77, 77}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); + + thrust::sequence(rmm::exec_policy(stream), itr, itr + input.size()); + expected = cudf::test::fixed_width_column_wrapper({0, 1, 2, 3, 4, 5, 6, 7, 8}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); + + auto indices = + cudf::test::fixed_width_column_wrapper({0, 10, 20, 30, 40, 50, 60, 70, 80}); + auto d_indices = cudf::column_view(indices); + thrust::lower_bound(rmm::exec_policy(stream), + d_indices.begin(), + d_indices.end(), + input.begin(), + input.end(), + itr); + expected = cudf::test::fixed_width_column_wrapper({0, 1, 1, 2, 3, 4, 5, 5, 7}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); +} + +/** + * For testing creating and using the indexalator in device code. + */ +struct device_functor_fn { + cudf::column_device_view const d_col; + __device__ cudf::size_type operator()(cudf::size_type idx) + { + auto itr = cudf::detail::input_indexalator(d_col.head(), d_col.type()); + return itr[idx] * 3; + } +}; + +TYPED_TEST(IndexalatorTest, device_indexalator) +{ + using T = TypeParam; + + auto d_col1 = + cudf::test::fixed_width_column_wrapper({0, 6, 7, 14, 23, 33, 43, 45, 63}); + auto d_col2 = + cudf::test::fixed_width_column_wrapper({0, 0, 0, 0, 0, 0, 0, 0, 0}); + auto input = cudf::column_view(d_col1); + auto output = cudf::mutable_column_view(d_col2); + auto stream = cudf::get_default_stream(); + + auto d_input = cudf::column_device_view::create(input, stream); + + thrust::transform(rmm::exec_policy(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(input.size()), + output.begin(), + device_functor_fn{*d_input}); + + auto expected = + cudf::test::fixed_width_column_wrapper({0, 18, 21, 42, 69, 99, 129, 135, 189}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); +} From f6419b48636f8bf58424bfc1eeb74438cecf6ad1 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 27 Sep 2023 18:41:39 -0400 Subject: [PATCH 02/20] return ref experiment --- cpp/include/cudf/detail/normalizing_iterator.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/detail/normalizing_iterator.cuh b/cpp/include/cudf/detail/normalizing_iterator.cuh index 71f13c432d6..96579a9aa51 100644 --- a/cpp/include/cudf/detail/normalizing_iterator.cuh +++ b/cpp/include/cudf/detail/normalizing_iterator.cuh @@ -331,7 +331,7 @@ struct output_normalator : base_normalator, Integer> * @brief Indirection operator returns this iterator instance in order * to capture the `operator=(Integer)` calls. */ - __device__ inline output_normalator const operator*() const { return *this; } + __device__ inline output_normalator const& operator*() const { return *this; } /** * @brief Array subscript operator returns an iterator instance at the specified `idx` position. From 0e369dde5094434f7fd9d3c4106a93fcedecdb96 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 28 Sep 2023 17:03:32 -0400 Subject: [PATCH 03/20] Normalizing offsets iterator --- .../cudf/column/column_device_view.cuh | 8 +- cpp/include/cudf/detail/offsets_iterator.cuh | 78 +++++++++++++++++++ 2 files changed, 83 insertions(+), 3 deletions(-) create mode 100644 cpp/include/cudf/detail/offsets_iterator.cuh diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 35851a99822..f964aabf8e5 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include #include @@ -442,10 +443,11 @@ class alignas(16) column_device_view : public detail::column_device_view_base { __device__ T element(size_type element_index) const noexcept { size_type index = element_index + offset(); // account for this view's _offset - auto const* d_offsets = d_children[strings_column_view::offsets_column_index].data(); char const* d_strings = d_children[strings_column_view::chars_column_index].data(); - size_type offset = d_offsets[index]; - return string_view{d_strings + offset, d_offsets[index + 1] - offset}; + auto const offsets = d_children[strings_column_view::offsets_column_index]; + auto const itr = cudf::detail::input_offsetsalator(offsets.head(), offsets.type()); + auto const offset = itr[index]; + return string_view{d_strings + offset, static_cast(itr[index + 1] - offset)}; } private: diff --git a/cpp/include/cudf/detail/offsets_iterator.cuh b/cpp/include/cudf/detail/offsets_iterator.cuh new file mode 100644 index 00000000000..ee48a481e4d --- /dev/null +++ b/cpp/include/cudf/detail/offsets_iterator.cuh @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cudf { +namespace detail { + +/** + * @brief The index normalizing input iterator. + * + * This is an iterator that can be used for index types (integers) without + * requiring a type-specific instance. It can be used for any iterator + * interface for reading an array of integer values of type + * int8, int16, int32, int64, uint8, uint16, uint32, or uint64. + * Reading specific elements always return a `size_type` integer. + * + * Use the indexalator_factory to create an appropriate input iterator + * from a column_view. + * + * Example input iterator usage. + * @code + * auto begin = indexalator_factory::create_input_iterator(gather_map); + * auto end = begin + gather_map.size(); + * auto result = detail::gather( source, begin, end, IGNORE, stream, mr ); + * @endcode + * + * @code + * auto begin = indexalator_factory::create_input_iterator(indices); + * auto end = begin + indices.size(); + * auto result = thrust::find(thrust::device, begin, end, size_type{12} ); + * @endcode + */ +using input_offsetsalator = input_normalator; + +/** + * @brief The index normalizing output iterator. + * + * This is an iterator that can be used for index types (integers) without + * requiring a type-specific instance. It can be used for any iterator + * interface for writing an array of integer values of type + * int8, int16, int32, int64, uint8, uint16, uint32, or uint64. + * Setting specific elements always accept `size_type` integer values. + * + * Use the indexalator_factory to create an appropriate output iterator + * from a mutable_column_view. + * + * Example output iterator usage. + * @code + * auto result_itr = indexalator_factory::create_output_iterator(indices->mutable_view()); + * thrust::lower_bound(rmm::exec_policy(stream), + * input->begin(), + * input->end(), + * values->begin(), + * values->end(), + * result_itr, + * thrust::less()); + * @endcode + */ +using output_offsetsalator = output_normalator; + +} // namespace detail +} // namespace cudf From 7dcb134b22ca347a9e809ff5c77a1096a0d3bf99 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 4 Oct 2023 20:56:17 -0400 Subject: [PATCH 04/20] 23.12 baseline compile-time commit --- cpp/include/cudf/types.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index addab160b6e..b83f7adabab 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -69,7 +69,7 @@ template class duration_scalar_device_view; class table; class table_view; -class mutable_table_view; +// class mutable_table_view; /** * @addtogroup utility_types From 88f6dff50cd6460b36442331876e7ebc8bbbef63 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 5 Oct 2023 18:38:38 -0400 Subject: [PATCH 05/20] undo temp change --- cpp/include/cudf/types.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index b83f7adabab..addab160b6e 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -69,7 +69,7 @@ template class duration_scalar_device_view; class table; class table_view; -// class mutable_table_view; +class mutable_table_view; /** * @addtogroup utility_types From 081cb8426a48c8b97f7b0789932c7d2c2704582e Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 6 Oct 2023 10:42:25 -0400 Subject: [PATCH 06/20] use cudf::is_index_type --- cpp/include/cudf/detail/normalizing_iterator.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/normalizing_iterator.cuh b/cpp/include/cudf/detail/normalizing_iterator.cuh index 96579a9aa51..24f27a0f79b 100644 --- a/cpp/include/cudf/detail/normalizing_iterator.cuh +++ b/cpp/include/cudf/detail/normalizing_iterator.cuh @@ -204,7 +204,7 @@ struct base_normalator { private: struct integer_sizeof_fn { - template ()>* = nullptr> + template ()>* = nullptr> constexpr int operator()() const { #ifndef __CUDA_ARCH__ @@ -213,7 +213,7 @@ struct base_normalator { CUDF_UNREACHABLE("only integral types are supported"); #endif } - template ()>* = nullptr> + template ()>* = nullptr> constexpr int operator()() const noexcept { return sizeof(T); From a28a9ff27c2ed19ce2ae5da6dc38416a4070c97b Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 6 Oct 2023 11:05:29 -0400 Subject: [PATCH 07/20] use cudf::is_index_type part 2 --- cpp/include/cudf/detail/normalizing_iterator.cuh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/include/cudf/detail/normalizing_iterator.cuh b/cpp/include/cudf/detail/normalizing_iterator.cuh index 24f27a0f79b..55b6f566684 100644 --- a/cpp/include/cudf/detail/normalizing_iterator.cuh +++ b/cpp/include/cudf/detail/normalizing_iterator.cuh @@ -34,7 +34,7 @@ namespace detail { */ template struct base_normalator { - static_assert(std::is_integral_v); + static_assert(cudf::is_index_type()); using difference_type = std::ptrdiff_t; using value_type = Integer; using pointer = Integer*; @@ -265,12 +265,12 @@ struct input_normalator : base_normalator, Integer> { * @brief Dispatch functor for resolving a Integer value from any integer type */ struct normalize_type { - template >* = nullptr> + template ()>* = nullptr> __device__ Integer operator()(void const* tp) { return static_cast(*static_cast(tp)); } - template >* = nullptr> + template ()>* = nullptr> __device__ Integer operator()(void const*) { CUDF_UNREACHABLE("only integral types are supported"); @@ -349,12 +349,12 @@ struct output_normalator : base_normalator, Integer> * @brief Dispatch functor for setting the index value from a size_type value. */ struct normalize_type { - template >* = nullptr> + template ()>* = nullptr> __device__ void operator()(void* tp, Integer const value) { (*static_cast(tp)) = static_cast(value); } - template >* = nullptr> + template ()>* = nullptr> __device__ void operator()(void*, Integer const) { CUDF_UNREACHABLE("only index types are supported"); From ccc5bf55cf7cf897714170f8ffd7ce8ad711ab42 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 6 Oct 2023 14:12:42 -0400 Subject: [PATCH 08/20] add offsetalator factory --- .../cudf/column/column_device_view.cuh | 2 +- cpp/include/cudf/detail/offsets_iterator.cuh | 54 +++--------- .../cudf/detail/offsets_iterator_factory.cuh | 87 +++++++++++++++++++ 3 files changed, 100 insertions(+), 43 deletions(-) create mode 100644 cpp/include/cudf/detail/offsets_iterator_factory.cuh diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index f964aabf8e5..b1ff0bbaea7 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -445,7 +445,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { size_type index = element_index + offset(); // account for this view's _offset char const* d_strings = d_children[strings_column_view::chars_column_index].data(); auto const offsets = d_children[strings_column_view::offsets_column_index]; - auto const itr = cudf::detail::input_offsetsalator(offsets.head(), offsets.type()); + auto const itr = cudf::detail::input_offsetalator(offsets.head(), offsets.type()); auto const offset = itr[index]; return string_view{d_strings + offset, static_cast(itr[index + 1] - offset)}; } diff --git a/cpp/include/cudf/detail/offsets_iterator.cuh b/cpp/include/cudf/detail/offsets_iterator.cuh index ee48a481e4d..7c3f9062c17 100644 --- a/cpp/include/cudf/detail/offsets_iterator.cuh +++ b/cpp/include/cudf/detail/offsets_iterator.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,57 +22,27 @@ namespace cudf { namespace detail { /** - * @brief The index normalizing input iterator. + * @brief The offsets normalizing input iterator * - * This is an iterator that can be used for index types (integers) without - * requiring a type-specific instance. It can be used for any iterator - * interface for reading an array of integer values of type - * int8, int16, int32, int64, uint8, uint16, uint32, or uint64. - * Reading specific elements always return a `size_type` integer. + * This is an iterator that can be used for offsets where the underlying + * type may be int32_t or int64_t. * - * Use the indexalator_factory to create an appropriate input iterator - * from a column_view. - * - * Example input iterator usage. - * @code - * auto begin = indexalator_factory::create_input_iterator(gather_map); - * auto end = begin + gather_map.size(); - * auto result = detail::gather( source, begin, end, IGNORE, stream, mr ); - * @endcode - * - * @code - * auto begin = indexalator_factory::create_input_iterator(indices); - * auto end = begin + indices.size(); - * auto result = thrust::find(thrust::device, begin, end, size_type{12} ); - * @endcode + * Use the offsetalator_factory to create an appropriate input iterator + * from an offsets column_view. */ -using input_offsetsalator = input_normalator; +using input_offsetalator = input_normalator; /** - * @brief The index normalizing output iterator. + * @brief The offsets normalizing output iterator * - * This is an iterator that can be used for index types (integers) without - * requiring a type-specific instance. It can be used for any iterator - * interface for writing an array of integer values of type - * int8, int16, int32, int64, uint8, uint16, uint32, or uint64. - * Setting specific elements always accept `size_type` integer values. + * This is an iterator that can be used for storing offsets values + * where the underlying type may be either int32_t or int64_t. * - * Use the indexalator_factory to create an appropriate output iterator + * Use the offsetalator_factory to create an appropriate output iterator * from a mutable_column_view. * - * Example output iterator usage. - * @code - * auto result_itr = indexalator_factory::create_output_iterator(indices->mutable_view()); - * thrust::lower_bound(rmm::exec_policy(stream), - * input->begin(), - * input->end(), - * values->begin(), - * values->end(), - * result_itr, - * thrust::less()); - * @endcode */ -using output_offsetsalator = output_normalator; +using output_offsetalator = output_normalator; } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/offsets_iterator_factory.cuh b/cpp/include/cudf/detail/offsets_iterator_factory.cuh new file mode 100644 index 00000000000..4baef35f3ae --- /dev/null +++ b/cpp/include/cudf/detail/offsets_iterator_factory.cuh @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace cudf { +namespace detail { + +/** + * @brief Use this class to create an offsetalator instance. + */ +struct offsetalator_factory { + /** + * @brief A type_dispatcher functor to create an input iterator from an offsets column + */ + struct input_offsetalator_fn { + template or std::is_same_v>* = nullptr> + input_offsetalator operator()(column_view const& indices) + { + return input_offsetalator(indices.data(), indices.type()); + } + template and not std::is_same_v>* = + nullptr> + input_offsetalator operator()(Args&&... args) + { + CUDF_FAIL("offsets must be int32 or int64 type"); + } + }; + + /** + * @brief Create an input offsetalator instance from an offsets column + */ + static input_offsetalator make_input_iterator(column_view const& offsets) + { + return type_dispatcher(offsets.type(), input_offsetalator_fn{}, offsets); + } + + /** + * @brief A type_dispatcher functor to create an output iterator from an offsets column + */ + struct output_offsetalator_fn { + template or std::is_same_v>* = nullptr> + output_offsetalator operator()(mutable_column_view const& indices) + { + return output_offsetalator(indices.data(), indices.type()); + } + template and not std::is_same_v>* = + nullptr> + output_offsetalator operator()(Args&&... args) + { + CUDF_FAIL("offsets must be an index type"); + } + }; + + /** + * @brief Create an output offsetalator instance from an offsets column + */ + static output_offsetalator make_output_iterator(mutable_column_view const& offsets) + { + return type_dispatcher(offsets.type(), output_offsetalator_fn{}, offsets); + } +}; + +} // namespace detail +} // namespace cudf From eb586f4a1f1c16f09c5e71999a9a52543d6b0246 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 10 Oct 2023 19:57:45 -0400 Subject: [PATCH 09/20] use size_t for index_sizeof_fn --- cpp/include/cudf/detail/normalizing_iterator.cuh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/detail/normalizing_iterator.cuh b/cpp/include/cudf/detail/normalizing_iterator.cuh index 55b6f566684..35a695d47df 100644 --- a/cpp/include/cudf/detail/normalizing_iterator.cuh +++ b/cpp/include/cudf/detail/normalizing_iterator.cuh @@ -205,7 +205,7 @@ struct base_normalator { private: struct integer_sizeof_fn { template ()>* = nullptr> - constexpr int operator()() const + CUDF_HOST_DEVICE constexpr std::size_t operator()() const { #ifndef __CUDA_ARCH__ CUDF_FAIL("only integral types are supported"); @@ -214,7 +214,7 @@ struct base_normalator { #endif } template ()>* = nullptr> - constexpr int operator()() const noexcept + CUDF_HOST_DEVICE constexpr std::size_t operator()() const noexcept { return sizeof(T); } @@ -226,10 +226,10 @@ struct base_normalator { */ explicit CUDF_HOST_DEVICE base_normalator(data_type dtype) : dtype_(dtype) { - width_ = type_dispatcher(dtype, integer_sizeof_fn{}); + width_ = static_cast(type_dispatcher(dtype, integer_sizeof_fn{})); } - int width_; /// integer type width = 1,2,4, or 8 + int32_t width_; /// integer type width = 1,2,4, or 8 data_type dtype_; /// for type-dispatcher calls }; From 1add4022898213f63f8dd9c1fd59c0143e5aee1c Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 24 Oct 2023 17:54:13 -0400 Subject: [PATCH 10/20] fix exception message --- cpp/include/cudf/detail/offsets_iterator_factory.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/detail/offsets_iterator_factory.cuh b/cpp/include/cudf/detail/offsets_iterator_factory.cuh index 4baef35f3ae..0cfad6cf3ca 100644 --- a/cpp/include/cudf/detail/offsets_iterator_factory.cuh +++ b/cpp/include/cudf/detail/offsets_iterator_factory.cuh @@ -70,7 +70,7 @@ struct offsetalator_factory { nullptr> output_offsetalator operator()(Args&&... args) { - CUDF_FAIL("offsets must be an index type"); + CUDF_FAIL("offsets must be int32 or int64 type"); } }; From 704c853c8e1699f8fb6d315925ae8e18e6e41853 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 1 Nov 2023 14:26:46 -0400 Subject: [PATCH 11/20] rework offsetalator/indexalator dispatch logic --- cpp/include/cudf/detail/indexalator.cuh | 128 +++++++++++++- .../cudf/detail/normalizing_iterator.cuh | 156 +----------------- cpp/include/cudf/detail/offsets_iterator.cuh | 114 ++++++++++++- .../cudf/detail/offsets_iterator_factory.cuh | 4 +- cpp/tests/CMakeLists.txt | 1 + cpp/tests/iterator/offsetalator_test.cu | 137 +++++++++++++++ 6 files changed, 380 insertions(+), 160 deletions(-) create mode 100644 cpp/tests/iterator/offsetalator_test.cu diff --git a/cpp/include/cudf/detail/indexalator.cuh b/cpp/include/cudf/detail/indexalator.cuh index 6532dae3695..a481d90456b 100644 --- a/cpp/include/cudf/detail/indexalator.cuh +++ b/cpp/include/cudf/detail/indexalator.cuh @@ -56,7 +56,64 @@ namespace detail { * auto result = thrust::find(thrust::device, begin, end, size_type{12} ); * @endcode */ -using input_indexalator = input_normalator; +struct input_indexalator : base_normalator { + friend struct base_normalator; // for CRTP + + using reference = cudf::size_type const; // this keeps STL and thrust happy + + input_indexalator() = default; + input_indexalator(input_indexalator const&) = default; + input_indexalator(input_indexalator&&) = default; + input_indexalator& operator=(input_indexalator const&) = default; + input_indexalator& operator=(input_indexalator&&) = default; + + /** + * @brief Indirection operator returns the value at the current iterator position + */ + __device__ inline cudf::size_type operator*() const { return operator[](0); } + + /** + * @brief Dispatch functor for resolving a Integer value from any integer type + */ + struct normalize_type { + template ()>* = nullptr> + __device__ cudf::size_type operator()(void const* tp) + { + return static_cast(*static_cast(tp)); + } + template ()>* = nullptr> + __device__ cudf::size_type operator()(void const*) + { + CUDF_UNREACHABLE("only integral types are supported"); + } + }; + + /** + * @brief Array subscript operator returns a value at the input + * `idx` position as a `Integer` value. + */ + __device__ inline cudf::size_type operator[](size_type idx) const + { + void const* tp = p_ + (idx * this->width_); + return type_dispatcher(this->dtype_, normalize_type{}, tp); + } + + /** + * @brief Create an input index normalizing iterator. + * + * Use the indexalator_factory to create an iterator instance. + * + * @param data Pointer to an integer array in device memory. + * @param data_type Type of data in data + */ + CUDF_HOST_DEVICE input_indexalator(void const* data, data_type dtype, cudf::size_type offset = 0) + : base_normalator(dtype), p_{static_cast(data)} + { + p_ += offset * this->width_; + } + + char const* p_; /// pointer to the integer data in device memory +}; /** * @brief The index normalizing output iterator. @@ -82,7 +139,74 @@ using input_indexalator = input_normalator; * thrust::less()); * @endcode */ -using output_indexalator = output_normalator; +struct output_indexalator : base_normalator { + friend struct base_normalator; // for CRTP + + using reference = output_indexalator const&; // required for output iterators + + output_indexalator() = default; + output_indexalator(output_indexalator const&) = default; + output_indexalator(output_indexalator&&) = default; + output_indexalator& operator=(output_indexalator const&) = default; + output_indexalator& operator=(output_indexalator&&) = default; + + /** + * @brief Indirection operator returns this iterator instance in order + * to capture the `operator=(Integer)` calls. + */ + __device__ inline output_indexalator const& operator*() const { return *this; } + + /** + * @brief Array subscript operator returns an iterator instance at the specified `idx` position. + * + * This allows capturing the subsequent `operator=(Integer)` call in this class. + */ + __device__ inline output_indexalator const operator[](size_type idx) const + { + output_indexalator tmp{*this}; + tmp.p_ += (idx * this->width_); + return tmp; + } + + /** + * @brief Dispatch functor for setting the index value from a size_type value. + */ + struct normalize_type { + template ()>* = nullptr> + __device__ void operator()(void* tp, cudf::size_type const value) + { + (*static_cast(tp)) = static_cast(value); + } + template ()>* = nullptr> + __device__ void operator()(void*, cudf::size_type const) + { + CUDF_UNREACHABLE("only index types are supported"); + } + }; + + /** + * @brief Assign an Integer value to the current iterator position + */ + __device__ inline output_indexalator const& operator=(cudf::size_type const value) const + { + void* tp = p_; + type_dispatcher(this->dtype_, normalize_type{}, tp, value); + return *this; + } + + /** + * @brief Create an output normalizing iterator + * + * @param data Pointer to an integer array in device memory. + * @param data_type Type of data in data + */ + CUDF_HOST_DEVICE output_indexalator(void* data, data_type dtype) + : base_normalator(dtype), p_{static_cast(data)} + { + } + + char* p_; /// pointer to the integer data in device memory +}; /** * @brief Use this class to create an indexalator instance. diff --git a/cpp/include/cudf/detail/normalizing_iterator.cuh b/cpp/include/cudf/detail/normalizing_iterator.cuh index 35a695d47df..9be3019cb38 100644 --- a/cpp/include/cudf/detail/normalizing_iterator.cuh +++ b/cpp/include/cudf/detail/normalizing_iterator.cuh @@ -204,7 +204,7 @@ struct base_normalator { private: struct integer_sizeof_fn { - template ()>* = nullptr> + template ()>* = nullptr> CUDF_HOST_DEVICE constexpr std::size_t operator()() const { #ifndef __CUDA_ARCH__ @@ -213,7 +213,7 @@ struct base_normalator { CUDF_UNREACHABLE("only integral types are supported"); #endif } - template ()>* = nullptr> + template ()>* = nullptr> CUDF_HOST_DEVICE constexpr std::size_t operator()() const noexcept { return sizeof(T); @@ -233,157 +233,5 @@ struct base_normalator { data_type dtype_; /// for type-dispatcher calls }; -/** - * @brief The integer normalizing input iterator - * - * This is an iterator that can be used for index types (integers) without - * requiring a type-specific instance. It can be used for any iterator - * interface for reading an array of integer values of type - * int8, int16, int32, int64, uint8, uint16, uint32, or uint64. - * Reading specific elements always return a type of `Integer` - * - * @tparam Integer Type returned by all read functions - */ -template -struct input_normalator : base_normalator, Integer> { - friend struct base_normalator, Integer>; // for CRTP - - using reference = Integer const; // this keeps STL and thrust happy - - input_normalator() = default; - input_normalator(input_normalator const&) = default; - input_normalator(input_normalator&&) = default; - input_normalator& operator=(input_normalator const&) = default; - input_normalator& operator=(input_normalator&&) = default; - - /** - * @brief Indirection operator returns the value at the current iterator position - */ - __device__ inline Integer operator*() const { return operator[](0); } - - /** - * @brief Dispatch functor for resolving a Integer value from any integer type - */ - struct normalize_type { - template ()>* = nullptr> - __device__ Integer operator()(void const* tp) - { - return static_cast(*static_cast(tp)); - } - template ()>* = nullptr> - __device__ Integer operator()(void const*) - { - CUDF_UNREACHABLE("only integral types are supported"); - } - }; - - /** - * @brief Array subscript operator returns a value at the input - * `idx` position as a `Integer` value. - */ - __device__ inline Integer operator[](size_type idx) const - { - void const* tp = p_ + (idx * this->width_); - return type_dispatcher(this->dtype_, normalize_type{}, tp); - } - - /** - * @brief Create an input index normalizing iterator. - * - * Use the indexalator_factory to create an iterator instance. - * - * @param data Pointer to an integer array in device memory. - * @param data_type Type of data in data - */ - CUDF_HOST_DEVICE input_normalator(void const* data, data_type dtype, cudf::size_type offset = 0) - : base_normalator, Integer>(dtype), p_{static_cast(data)} - { - p_ += offset * this->width_; - } - - char const* p_; /// pointer to the integer data in device memory -}; - -/** - * @brief The integer normalizing output iterator - * - * This is an iterator that can be used for index types (integers) without - * requiring a type-specific instance. It can be used for any iterator - * interface for writing an array of integer values of type - * int8, int16, int32, int64, uint8, uint16, uint32, or uint64. - * Setting specific elements always accept the `Integer` type values. - * - * @tparam Integer The type used for all write functions - */ -template -struct output_normalator : base_normalator, Integer> { - friend struct base_normalator, Integer>; // for CRTP - - using reference = output_normalator const&; // required for output iterators - - output_normalator() = default; - output_normalator(output_normalator const&) = default; - output_normalator(output_normalator&&) = default; - output_normalator& operator=(output_normalator const&) = default; - output_normalator& operator=(output_normalator&&) = default; - - /** - * @brief Indirection operator returns this iterator instance in order - * to capture the `operator=(Integer)` calls. - */ - __device__ inline output_normalator const& operator*() const { return *this; } - - /** - * @brief Array subscript operator returns an iterator instance at the specified `idx` position. - * - * This allows capturing the subsequent `operator=(Integer)` call in this class. - */ - __device__ inline output_normalator const operator[](size_type idx) const - { - output_normalator tmp{*this}; - tmp.p_ += (idx * this->width_); - return tmp; - } - - /** - * @brief Dispatch functor for setting the index value from a size_type value. - */ - struct normalize_type { - template ()>* = nullptr> - __device__ void operator()(void* tp, Integer const value) - { - (*static_cast(tp)) = static_cast(value); - } - template ()>* = nullptr> - __device__ void operator()(void*, Integer const) - { - CUDF_UNREACHABLE("only index types are supported"); - } - }; - - /** - * @brief Assign an Integer value to the current iterator position - */ - __device__ inline output_normalator const& operator=(Integer const value) const - { - void* tp = p_; - type_dispatcher(this->dtype_, normalize_type{}, tp, value); - return *this; - } - - /** - * @brief Create an output normalizing iterator - * - * @param data Pointer to an integer array in device memory. - * @param data_type Type of data in data - */ - CUDF_HOST_DEVICE output_normalator(void* data, data_type dtype) - : base_normalator, Integer>(dtype), p_{static_cast(data)} - { - } - - char* p_; /// pointer to the integer data in device memory -}; - } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/offsets_iterator.cuh b/cpp/include/cudf/detail/offsets_iterator.cuh index 7c3f9062c17..202964cdbe1 100644 --- a/cpp/include/cudf/detail/offsets_iterator.cuh +++ b/cpp/include/cudf/detail/offsets_iterator.cuh @@ -30,7 +30,55 @@ namespace detail { * Use the offsetalator_factory to create an appropriate input iterator * from an offsets column_view. */ -using input_offsetalator = input_normalator; +struct input_offsetalator : base_normalator { + friend struct base_normalator; // for CRTP + + using reference = int64_t const; // this keeps STL and thrust happy + + input_offsetalator() = default; + input_offsetalator(input_offsetalator const&) = default; + input_offsetalator(input_offsetalator&&) = default; + input_offsetalator& operator=(input_offsetalator const&) = default; + input_offsetalator& operator=(input_offsetalator&&) = default; + + /** + * @brief Indirection operator returns the value at the current iterator position + */ + __device__ inline int64_t operator*() const { return operator[](0); } + + /** + * @brief Array subscript operator returns a value at the input + * `idx` position as a int64_t value. + */ + __device__ inline int64_t operator[](size_type idx) const + { + void const* tp = p_ + (idx * this->width_); + return this->width_ == sizeof(int32_t) ? static_cast(*static_cast(tp)) + : *static_cast(tp); + } + + /** + * @brief Create an input index normalizing iterator. + * + * Use the indexalator_factory to create an iterator instance. + * + * @param data Pointer to an integer array in device memory. + * @param data_type Type of data in data + */ + CUDF_HOST_DEVICE input_offsetalator(void const* data, data_type dtype) + : base_normalator(dtype), p_{static_cast(data)} + { +#ifndef __CUDA_ARCH__ + CUDF_EXPECTS(dtype.id() == type_id::INT32 || dtype.id() == type_id::INT64, + "Unexpected offsets type"); +#else + cudf_assert((dtype.id() == type_id::INT32 || dtype.id() == type_id::INT64) && + "Unexpected offsets type"); +#endif + } + + char const* p_; /// pointer to the integer data in device memory +}; /** * @brief The offsets normalizing output iterator @@ -42,7 +90,69 @@ using input_offsetalator = input_normalator; * from a mutable_column_view. * */ -using output_offsetalator = output_normalator; +struct output_offsetalator : base_normalator { + friend struct base_normalator; // for CRTP + + using reference = output_offsetalator const&; // required for output iterators + + output_offsetalator() = default; + output_offsetalator(output_offsetalator const&) = default; + output_offsetalator(output_offsetalator&&) = default; + output_offsetalator& operator=(output_offsetalator const&) = default; + output_offsetalator& operator=(output_offsetalator&&) = default; + + /** + * @brief Indirection operator returns this iterator instance in order + * to capture the `operator=(int64)` calls. + */ + __device__ inline output_offsetalator const& operator*() const { return *this; } + + /** + * @brief Array subscript operator returns an iterator instance at the specified `idx` position. + * + * This allows capturing the subsequent `operator=(int64)` call in this class. + */ + __device__ inline output_offsetalator const operator[](size_type idx) const + { + output_offsetalator tmp{*this}; + tmp.p_ += (idx * this->width_); + return tmp; + } + + /** + * @brief Assign an offset value to the current iterator position + */ + __device__ inline output_offsetalator const& operator=(int64_t const value) const + { + void* tp = p_; + if (this->width_ == sizeof(int32_t)) { + (*static_cast(tp)) = static_cast(value); + } else { + (*static_cast(tp)) = value; + } + return *this; + } + + /** + * @brief Create an output offsets iterator + * + * @param data Pointer to an integer array in device memory. + * @param data_type Type of data in data + */ + CUDF_HOST_DEVICE output_offsetalator(void* data, data_type dtype) + : base_normalator(dtype), p_{static_cast(data)} + { +#ifndef __CUDA_ARCH__ + CUDF_EXPECTS(dtype.id() == type_id::INT32 || dtype.id() == type_id::INT64, + "Unexpected offsets type"); +#else + cudf_assert((dtype.id() == type_id::INT32 || dtype.id() == type_id::INT64) && + "Unexpected offsets type"); +#endif + } + + char* p_; /// pointer to the integer data in device memory +}; } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/offsets_iterator_factory.cuh b/cpp/include/cudf/detail/offsets_iterator_factory.cuh index 0cfad6cf3ca..03af35730be 100644 --- a/cpp/include/cudf/detail/offsets_iterator_factory.cuh +++ b/cpp/include/cudf/detail/offsets_iterator_factory.cuh @@ -34,7 +34,7 @@ struct offsetalator_factory { std::enable_if_t or std::is_same_v>* = nullptr> input_offsetalator operator()(column_view const& indices) { - return input_offsetalator(indices.data(), indices.type()); + return input_offsetalator(indices.data(), indices.type()); } template or std::is_same_v>* = nullptr> output_offsetalator operator()(mutable_column_view const& indices) { - return output_offsetalator(indices.data(), indices.type()); + return output_offsetalator(indices.data(), indices.type()); } template + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +using TestingTypes = cudf::test::Types; + +template +struct OffsetalatorTest : public IteratorTest {}; + +TYPED_TEST_SUITE(OffsetalatorTest, TestingTypes); + +TYPED_TEST(OffsetalatorTest, input_iterator) +{ + using T = TypeParam; + + auto host_values = cudf::test::make_type_param_vector({0, 6, 0, -14, 13, 64, -13, -20, 45}); + + auto d_col = cudf::test::fixed_width_column_wrapper(host_values.begin(), host_values.end()); + + auto expected_values = thrust::host_vector(host_values.size()); + std::transform(host_values.begin(), host_values.end(), expected_values.begin(), [](auto v) { + return static_cast(v); + }); + + auto it_dev = cudf::detail::offsetalator_factory::make_input_iterator(d_col); + this->iterator_test_thrust(expected_values, it_dev, host_values.size()); +} + +TYPED_TEST(OffsetalatorTest, output_iterator) +{ + using T = TypeParam; + + auto d_col1 = cudf::test::fixed_width_column_wrapper({0, 6, 7, 14, 23, 33, 43, 45, 63}); + auto d_col2 = cudf::test::fixed_width_column_wrapper({0, 0, 0, 0, 0, 0, 0, 0, 0}); + auto itr = cudf::detail::offsetalator_factory::make_output_iterator(d_col2); + auto input = cudf::column_view(d_col1); + auto stream = cudf::get_default_stream(); + + auto map = cudf::test::fixed_width_column_wrapper({0, 2, 4, 6, 8, 1, 3, 5, 7}); + auto d_map = cudf::column_view(map); + thrust::gather(rmm::exec_policy_nosync(stream), + d_map.begin(), + d_map.end(), + input.begin(), + itr); + auto expected = cudf::test::fixed_width_column_wrapper({0, 7, 23, 43, 63, 6, 14, 33, 45}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); + + thrust::scatter(rmm::exec_policy_nosync(stream), + input.begin(), + input.end(), + d_map.begin(), + itr); + expected = cudf::test::fixed_width_column_wrapper({0, 33, 6, 43, 7, 45, 14, 63, 23}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); + + thrust::fill(rmm::exec_policy(stream), itr, itr + input.size(), 77); + expected = cudf::test::fixed_width_column_wrapper({77, 77, 77, 77, 77, 77, 77, 77, 77}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); + + thrust::sequence(rmm::exec_policy(stream), itr, itr + input.size()); + expected = cudf::test::fixed_width_column_wrapper({0, 1, 2, 3, 4, 5, 6, 7, 8}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); + + auto offsets = + cudf::test::fixed_width_column_wrapper({0, 10, 20, 30, 40, 50, 60, 70, 80}); + auto d_offsets = cudf::column_view(offsets); + thrust::lower_bound(rmm::exec_policy(stream), + d_offsets.begin(), + d_offsets.end(), + input.begin(), + input.end(), + itr); + expected = cudf::test::fixed_width_column_wrapper({0, 1, 1, 2, 3, 4, 5, 5, 7}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); +} + +/** + * For testing creating and using the offsetalator in device code. + */ +struct device_functor_fn { + cudf::column_device_view const d_col; + __device__ int32_t operator()(int idx) + { + auto const itr = cudf::detail::input_offsetalator(d_col.head(), d_col.type()); + return static_cast(itr[idx] * 3); + } +}; + +TYPED_TEST(OffsetalatorTest, device_offsetalator) +{ + using T = TypeParam; + + auto d_col1 = cudf::test::fixed_width_column_wrapper({0, 6, 7, 14, 23, 33, 43, 45, 63}); + auto d_col2 = cudf::test::fixed_width_column_wrapper({0, 0, 0, 0, 0, 0, 0, 0, 0}); + auto input = cudf::column_view(d_col1); + auto output = cudf::mutable_column_view(d_col2); + auto stream = cudf::get_default_stream(); + + auto d_input = cudf::column_device_view::create(input, stream); + + thrust::transform(rmm::exec_policy(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(input.size()), + output.begin(), + device_functor_fn{*d_input}); + + auto expected = + cudf::test::fixed_width_column_wrapper({0, 18, 21, 42, 69, 99, 129, 135, 189}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); +} From 8ee15a3ba9bd0e24cc07a1466307ab814f689213 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 1 Nov 2023 20:31:45 -0400 Subject: [PATCH 12/20] add alignas --- cpp/include/cudf/detail/normalizing_iterator.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/detail/normalizing_iterator.cuh b/cpp/include/cudf/detail/normalizing_iterator.cuh index 9be3019cb38..eb4ade3d289 100644 --- a/cpp/include/cudf/detail/normalizing_iterator.cuh +++ b/cpp/include/cudf/detail/normalizing_iterator.cuh @@ -33,7 +33,7 @@ namespace detail { * @tparam Integer The type the iterator normalizes to */ template -struct base_normalator { +struct alignas(16) base_normalator { static_assert(cudf::is_index_type()); using difference_type = std::ptrdiff_t; using value_type = Integer; From a97d0614c7a94098e55f1fe5fde29c9f2a95ce29 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 2 Nov 2023 04:47:21 -0400 Subject: [PATCH 13/20] add more alignases --- cpp/include/cudf/detail/indexalator.cuh | 4 ++-- cpp/tests/iterator/indexalator_test.cu | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/detail/indexalator.cuh b/cpp/include/cudf/detail/indexalator.cuh index a481d90456b..62a26c793c5 100644 --- a/cpp/include/cudf/detail/indexalator.cuh +++ b/cpp/include/cudf/detail/indexalator.cuh @@ -56,7 +56,7 @@ namespace detail { * auto result = thrust::find(thrust::device, begin, end, size_type{12} ); * @endcode */ -struct input_indexalator : base_normalator { +struct alignas(16) input_indexalator : base_normalator { friend struct base_normalator; // for CRTP using reference = cudf::size_type const; // this keeps STL and thrust happy @@ -139,7 +139,7 @@ struct input_indexalator : base_normalator { * thrust::less()); * @endcode */ -struct output_indexalator : base_normalator { +struct alignas(16) output_indexalator : base_normalator { friend struct base_normalator; // for CRTP using reference = output_indexalator const&; // required for output iterators diff --git a/cpp/tests/iterator/indexalator_test.cu b/cpp/tests/iterator/indexalator_test.cu index 3e8bcd5cb0d..deb9ff5ff0e 100644 --- a/cpp/tests/iterator/indexalator_test.cu +++ b/cpp/tests/iterator/indexalator_test.cu @@ -161,7 +161,7 @@ TYPED_TEST(IndexalatorTest, output_iterator) /** * For testing creating and using the indexalator in device code. */ -struct device_functor_fn { +struct alignas(16) device_functor_fn { cudf::column_device_view const d_col; __device__ cudf::size_type operator()(cudf::size_type idx) { @@ -170,7 +170,7 @@ struct device_functor_fn { } }; -TYPED_TEST(IndexalatorTest, device_indexalator) +TYPED_TEST(IndexalatorTest, DISABLED_device_indexalator) { using T = TypeParam; From 1ed5e29d2232ba93394b2993789c3b60e646ed81 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 2 Nov 2023 10:06:18 -0400 Subject: [PATCH 14/20] re-enable device test --- cpp/tests/iterator/indexalator_test.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/iterator/indexalator_test.cu b/cpp/tests/iterator/indexalator_test.cu index deb9ff5ff0e..1b198e4446b 100644 --- a/cpp/tests/iterator/indexalator_test.cu +++ b/cpp/tests/iterator/indexalator_test.cu @@ -170,7 +170,7 @@ struct alignas(16) device_functor_fn { } }; -TYPED_TEST(IndexalatorTest, DISABLED_device_indexalator) +TYPED_TEST(IndexalatorTest, device_indexalator) { using T = TypeParam; From 5839fcdc68b4da5ef85c6b191ad15ac8603e0f06 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 2 Nov 2023 11:20:44 -0400 Subject: [PATCH 15/20] remove unneeded test --- cpp/tests/iterator/indexalator_test.cu | 37 -------------------------- 1 file changed, 37 deletions(-) diff --git a/cpp/tests/iterator/indexalator_test.cu b/cpp/tests/iterator/indexalator_test.cu index 1b198e4446b..0c10853ec02 100644 --- a/cpp/tests/iterator/indexalator_test.cu +++ b/cpp/tests/iterator/indexalator_test.cu @@ -157,40 +157,3 @@ TYPED_TEST(IndexalatorTest, output_iterator) expected = cudf::test::fixed_width_column_wrapper({0, 1, 1, 2, 3, 4, 5, 5, 7}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); } - -/** - * For testing creating and using the indexalator in device code. - */ -struct alignas(16) device_functor_fn { - cudf::column_device_view const d_col; - __device__ cudf::size_type operator()(cudf::size_type idx) - { - auto itr = cudf::detail::input_indexalator(d_col.head(), d_col.type()); - return itr[idx] * 3; - } -}; - -TYPED_TEST(IndexalatorTest, device_indexalator) -{ - using T = TypeParam; - - auto d_col1 = - cudf::test::fixed_width_column_wrapper({0, 6, 7, 14, 23, 33, 43, 45, 63}); - auto d_col2 = - cudf::test::fixed_width_column_wrapper({0, 0, 0, 0, 0, 0, 0, 0, 0}); - auto input = cudf::column_view(d_col1); - auto output = cudf::mutable_column_view(d_col2); - auto stream = cudf::get_default_stream(); - - auto d_input = cudf::column_device_view::create(input, stream); - - thrust::transform(rmm::exec_policy(stream), - thrust::counting_iterator(0), - thrust::counting_iterator(input.size()), - output.begin(), - device_functor_fn{*d_input}); - - auto expected = - cudf::test::fixed_width_column_wrapper({0, 18, 21, 42, 69, 99, 129, 135, 189}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); -} From e0d4e5f2c8c918fda19bae10e15fcf0eeb71a9ff Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 2 Nov 2023 13:43:41 -0400 Subject: [PATCH 16/20] change std::enable_if_t to CUDF_ENABLE_IF --- cpp/include/cudf/detail/offsets_iterator_factory.cuh | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/cpp/include/cudf/detail/offsets_iterator_factory.cuh b/cpp/include/cudf/detail/offsets_iterator_factory.cuh index 03af35730be..65639d41543 100644 --- a/cpp/include/cudf/detail/offsets_iterator_factory.cuh +++ b/cpp/include/cudf/detail/offsets_iterator_factory.cuh @@ -30,16 +30,14 @@ struct offsetalator_factory { * @brief A type_dispatcher functor to create an input iterator from an offsets column */ struct input_offsetalator_fn { - template or std::is_same_v>* = nullptr> + template or std::is_same_v)> input_offsetalator operator()(column_view const& indices) { return input_offsetalator(indices.data(), indices.type()); } template and not std::is_same_v>* = - nullptr> + CUDF_ENABLE_IF(not std::is_same_v and not std::is_same_v)> input_offsetalator operator()(Args&&... args) { CUDF_FAIL("offsets must be int32 or int64 type"); @@ -58,16 +56,14 @@ struct offsetalator_factory { * @brief A type_dispatcher functor to create an output iterator from an offsets column */ struct output_offsetalator_fn { - template or std::is_same_v>* = nullptr> + template or std::is_same_v)> output_offsetalator operator()(mutable_column_view const& indices) { return output_offsetalator(indices.data(), indices.type()); } template and not std::is_same_v>* = - nullptr> + CUDF_ENABLE_IF(not std::is_same_v and not std::is_same_v)> output_offsetalator operator()(Args&&... args) { CUDF_FAIL("offsets must be int32 or int64 type"); From 6ec6258beb90a8cd76db48179322c6c9b48e8048 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 3 Nov 2023 13:38:05 -0400 Subject: [PATCH 17/20] add anonymous namespace around internal functor --- cpp/tests/iterator/offsetalator_test.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/tests/iterator/offsetalator_test.cu b/cpp/tests/iterator/offsetalator_test.cu index aa1fd3f29ff..7d13a38697a 100644 --- a/cpp/tests/iterator/offsetalator_test.cu +++ b/cpp/tests/iterator/offsetalator_test.cu @@ -101,6 +101,7 @@ TYPED_TEST(OffsetalatorTest, output_iterator) CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); } +namespace { /** * For testing creating and using the offsetalator in device code. */ @@ -112,6 +113,7 @@ struct device_functor_fn { return static_cast(itr[idx] * 3); } }; +} // namespace TYPED_TEST(OffsetalatorTest, device_offsetalator) { From 3a21fe17aecd05e56251887978ab268b566783ee Mon Sep 17 00:00:00 2001 From: David Wendt Date: Mon, 6 Nov 2023 14:56:22 -0500 Subject: [PATCH 18/20] remove type-dispatcher call from ctor --- cpp/include/cudf/detail/indexalator.cuh | 32 ++++++++----------- .../cudf/detail/normalizing_iterator.cuh | 12 +++++-- cpp/include/cudf/detail/offsets_iterator.cuh | 6 +++- 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/cpp/include/cudf/detail/indexalator.cuh b/cpp/include/cudf/detail/indexalator.cuh index 62a26c793c5..e8f8161dc78 100644 --- a/cpp/include/cudf/detail/indexalator.cuh +++ b/cpp/include/cudf/detail/indexalator.cuh @@ -56,7 +56,7 @@ namespace detail { * auto result = thrust::find(thrust::device, begin, end, size_type{12} ); * @endcode */ -struct alignas(16) input_indexalator : base_normalator { +struct input_indexalator : base_normalator { friend struct base_normalator; // for CRTP using reference = cudf::size_type const; // this keeps STL and thrust happy @@ -76,12 +76,12 @@ struct alignas(16) input_indexalator : base_normalator()>* = nullptr> + template ())> __device__ cudf::size_type operator()(void const* tp) { return static_cast(*static_cast(tp)); } - template ()>* = nullptr> + template ())> __device__ cudf::size_type operator()(void const*) { CUDF_UNREACHABLE("only integral types are supported"); @@ -112,6 +112,7 @@ struct alignas(16) input_indexalator : base_normalatorwidth_; } + protected: char const* p_; /// pointer to the integer data in device memory }; @@ -139,7 +140,7 @@ struct alignas(16) input_indexalator : base_normalator()); * @endcode */ -struct alignas(16) output_indexalator : base_normalator { +struct output_indexalator : base_normalator { friend struct base_normalator; // for CRTP using reference = output_indexalator const&; // required for output iterators @@ -172,12 +173,12 @@ struct alignas(16) output_indexalator : base_normalator()>* = nullptr> + template ())> __device__ void operator()(void* tp, cudf::size_type const value) { (*static_cast(tp)) = static_cast(value); } - template ()>* = nullptr> + template ())> __device__ void operator()(void*, cudf::size_type const) { CUDF_UNREACHABLE("only index types are supported"); @@ -205,6 +206,7 @@ struct alignas(16) output_indexalator : base_normalator()>* = nullptr> + template ())> input_indexalator operator()(column_view const& indices) { return input_indexalator(indices.data(), indices.type()); } - template ()>* = nullptr> + template ())> input_indexalator operator()(Args&&... args) { CUDF_FAIL("indices must be an index type"); @@ -234,16 +234,14 @@ struct indexalator_factory { * @brief Use this class to create an indexalator to a scalar index. */ struct input_indexalator_scalar_fn { - template ()>* = nullptr> + template ())> input_indexalator operator()(scalar const& index) { // note: using static_cast const&>(index) creates a copy auto const scalar_impl = static_cast const*>(&index); return input_indexalator(scalar_impl->data(), index.type()); } - template ()>* = nullptr> + template ())> input_indexalator operator()(Args&&... args) { CUDF_FAIL("scalar must be an index type"); @@ -254,14 +252,12 @@ struct indexalator_factory { * @brief A type_dispatcher functor to create an output iterator from an indices column. */ struct output_indexalator_fn { - template ()>* = nullptr> + template ())> output_indexalator operator()(mutable_column_view const& indices) { return output_indexalator(indices.data(), indices.type()); } - template ()>* = nullptr> + template ())> output_indexalator operator()(Args&&... args) { CUDF_FAIL("indices must be an index type"); diff --git a/cpp/include/cudf/detail/normalizing_iterator.cuh b/cpp/include/cudf/detail/normalizing_iterator.cuh index eb4ade3d289..8f90afc3e57 100644 --- a/cpp/include/cudf/detail/normalizing_iterator.cuh +++ b/cpp/include/cudf/detail/normalizing_iterator.cuh @@ -204,7 +204,7 @@ struct alignas(16) base_normalator { private: struct integer_sizeof_fn { - template ()>* = nullptr> + template ())> CUDF_HOST_DEVICE constexpr std::size_t operator()() const { #ifndef __CUDA_ARCH__ @@ -213,7 +213,7 @@ struct alignas(16) base_normalator { CUDF_UNREACHABLE("only integral types are supported"); #endif } - template ()>* = nullptr> + template ())> CUDF_HOST_DEVICE constexpr std::size_t operator()() const noexcept { return sizeof(T); @@ -229,6 +229,14 @@ struct alignas(16) base_normalator { width_ = static_cast(type_dispatcher(dtype, integer_sizeof_fn{})); } + /** + * @brief Constructor assigns width and type member variables for base class. + */ + explicit CUDF_HOST_DEVICE base_normalator(data_type dtype, int32_t width) + : width_(width), dtype_(dtype) + { + } + int32_t width_; /// integer type width = 1,2,4, or 8 data_type dtype_; /// for type-dispatcher calls }; diff --git a/cpp/include/cudf/detail/offsets_iterator.cuh b/cpp/include/cudf/detail/offsets_iterator.cuh index 202964cdbe1..15a11826a50 100644 --- a/cpp/include/cudf/detail/offsets_iterator.cuh +++ b/cpp/include/cudf/detail/offsets_iterator.cuh @@ -66,7 +66,9 @@ struct input_offsetalator : base_normalator { * @param data_type Type of data in data */ CUDF_HOST_DEVICE input_offsetalator(void const* data, data_type dtype) - : base_normalator(dtype), p_{static_cast(data)} + : base_normalator( + dtype, dtype.id() == type_id::INT32 ? sizeof(int32_t) : sizeof(int64_t)), + p_{static_cast(data)} { #ifndef __CUDA_ARCH__ CUDF_EXPECTS(dtype.id() == type_id::INT32 || dtype.id() == type_id::INT64, @@ -77,6 +79,7 @@ struct input_offsetalator : base_normalator { #endif } + protected: char const* p_; /// pointer to the integer data in device memory }; @@ -151,6 +154,7 @@ struct output_offsetalator : base_normalator { #endif } + protected: char* p_; /// pointer to the integer data in device memory }; From f4634f94ed4f61250515be35c2433b7470fb7058 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 7 Nov 2023 19:13:07 -0500 Subject: [PATCH 19/20] removed unneeded dispatch in factory --- cpp/include/cudf/detail/indexalator.cuh | 4 +- cpp/include/cudf/detail/offsets_iterator.cuh | 5 ++- .../cudf/detail/offsets_iterator_factory.cuh | 42 ++----------------- cpp/tests/iterator/offsetalator_test.cu | 1 + 4 files changed, 10 insertions(+), 42 deletions(-) diff --git a/cpp/include/cudf/detail/indexalator.cuh b/cpp/include/cudf/detail/indexalator.cuh index e8f8161dc78..8e819563e2f 100644 --- a/cpp/include/cudf/detail/indexalator.cuh +++ b/cpp/include/cudf/detail/indexalator.cuh @@ -155,7 +155,7 @@ struct output_indexalator : base_normalator * @brief Indirection operator returns this iterator instance in order * to capture the `operator=(Integer)` calls. */ - __device__ inline output_indexalator const& operator*() const { return *this; } + __device__ inline reference operator*() const { return *this; } /** * @brief Array subscript operator returns an iterator instance at the specified `idx` position. @@ -188,7 +188,7 @@ struct output_indexalator : base_normalator /** * @brief Assign an Integer value to the current iterator position */ - __device__ inline output_indexalator const& operator=(cudf::size_type const value) const + __device__ inline reference operator=(cudf::size_type const value) const { void* tp = p_; type_dispatcher(this->dtype_, normalize_type{}, tp, value); diff --git a/cpp/include/cudf/detail/offsets_iterator.cuh b/cpp/include/cudf/detail/offsets_iterator.cuh index 15a11826a50..e05f86f9158 100644 --- a/cpp/include/cudf/detail/offsets_iterator.cuh +++ b/cpp/include/cudf/detail/offsets_iterator.cuh @@ -17,6 +17,7 @@ #pragma once #include +#include namespace cudf { namespace detail { @@ -143,7 +144,9 @@ struct output_offsetalator : base_normalator { * @param data_type Type of data in data */ CUDF_HOST_DEVICE output_offsetalator(void* data, data_type dtype) - : base_normalator(dtype), p_{static_cast(data)} + : base_normalator( + dtype, dtype.id() == type_id::INT32 ? sizeof(int32_t) : sizeof(int64_t)), + p_{static_cast(data)} { #ifndef __CUDA_ARCH__ CUDF_EXPECTS(dtype.id() == type_id::INT32 || dtype.id() == type_id::INT64, diff --git a/cpp/include/cudf/detail/offsets_iterator_factory.cuh b/cpp/include/cudf/detail/offsets_iterator_factory.cuh index 65639d41543..5b4c6b825d2 100644 --- a/cpp/include/cudf/detail/offsets_iterator_factory.cuh +++ b/cpp/include/cudf/detail/offsets_iterator_factory.cuh @@ -16,7 +16,7 @@ #pragma once -#include +#include #include namespace cudf { @@ -26,56 +26,20 @@ namespace detail { * @brief Use this class to create an offsetalator instance. */ struct offsetalator_factory { - /** - * @brief A type_dispatcher functor to create an input iterator from an offsets column - */ - struct input_offsetalator_fn { - template or std::is_same_v)> - input_offsetalator operator()(column_view const& indices) - { - return input_offsetalator(indices.data(), indices.type()); - } - template and not std::is_same_v)> - input_offsetalator operator()(Args&&... args) - { - CUDF_FAIL("offsets must be int32 or int64 type"); - } - }; - /** * @brief Create an input offsetalator instance from an offsets column */ static input_offsetalator make_input_iterator(column_view const& offsets) { - return type_dispatcher(offsets.type(), input_offsetalator_fn{}, offsets); + return input_offsetalator(offsets.head(), offsets.type()); } - /** - * @brief A type_dispatcher functor to create an output iterator from an offsets column - */ - struct output_offsetalator_fn { - template or std::is_same_v)> - output_offsetalator operator()(mutable_column_view const& indices) - { - return output_offsetalator(indices.data(), indices.type()); - } - template and not std::is_same_v)> - output_offsetalator operator()(Args&&... args) - { - CUDF_FAIL("offsets must be int32 or int64 type"); - } - }; - /** * @brief Create an output offsetalator instance from an offsets column */ static output_offsetalator make_output_iterator(mutable_column_view const& offsets) { - return type_dispatcher(offsets.type(), output_offsetalator_fn{}, offsets); + return output_offsetalator(offsets.head(), offsets.type()); } }; diff --git a/cpp/tests/iterator/offsetalator_test.cu b/cpp/tests/iterator/offsetalator_test.cu index 7d13a38697a..e569e58f42a 100644 --- a/cpp/tests/iterator/offsetalator_test.cu +++ b/cpp/tests/iterator/offsetalator_test.cu @@ -27,6 +27,7 @@ #include #include #include +#include using TestingTypes = cudf::test::Types; From 56a73c5602a0e2bd2a8dba80bc55758c1ab5fe75 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 8 Nov 2023 15:45:01 -0500 Subject: [PATCH 20/20] fix doxygen comments --- cpp/include/cudf/detail/indexalator.cuh | 11 ++++++----- cpp/include/cudf/detail/offsets_iterator.cuh | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cpp/include/cudf/detail/indexalator.cuh b/cpp/include/cudf/detail/indexalator.cuh index 8e819563e2f..4d261c54b29 100644 --- a/cpp/include/cudf/detail/indexalator.cuh +++ b/cpp/include/cudf/detail/indexalator.cuh @@ -99,12 +99,13 @@ struct input_indexalator : base_normalator { } /** - * @brief Create an input index normalizing iterator. + * @brief Create an input index normalizing iterator * * Use the indexalator_factory to create an iterator instance. * - * @param data Pointer to an integer array in device memory. - * @param data_type Type of data in data + * @param data Pointer to an integer array in device memory. + * @param dtype Type of data in data + * @param offset Applied to the data pointer per size of the type */ CUDF_HOST_DEVICE input_indexalator(void const* data, data_type dtype, cudf::size_type offset = 0) : base_normalator(dtype), p_{static_cast(data)} @@ -117,7 +118,7 @@ struct input_indexalator : base_normalator { }; /** - * @brief The index normalizing output iterator. + * @brief The index normalizing output iterator * * This is an iterator that can be used for index types (integers) without * requiring a type-specific instance. It can be used for any iterator @@ -199,7 +200,7 @@ struct output_indexalator : base_normalator * @brief Create an output normalizing iterator * * @param data Pointer to an integer array in device memory. - * @param data_type Type of data in data + * @param dtype Type of data in data */ CUDF_HOST_DEVICE output_indexalator(void* data, data_type dtype) : base_normalator(dtype), p_{static_cast(data)} diff --git a/cpp/include/cudf/detail/offsets_iterator.cuh b/cpp/include/cudf/detail/offsets_iterator.cuh index e05f86f9158..3eb77b32353 100644 --- a/cpp/include/cudf/detail/offsets_iterator.cuh +++ b/cpp/include/cudf/detail/offsets_iterator.cuh @@ -64,7 +64,7 @@ struct input_offsetalator : base_normalator { * Use the indexalator_factory to create an iterator instance. * * @param data Pointer to an integer array in device memory. - * @param data_type Type of data in data + * @param dtype Type of data in data */ CUDF_HOST_DEVICE input_offsetalator(void const* data, data_type dtype) : base_normalator( @@ -141,7 +141,7 @@ struct output_offsetalator : base_normalator { * @brief Create an output offsets iterator * * @param data Pointer to an integer array in device memory. - * @param data_type Type of data in data + * @param dtype Type of data in data */ CUDF_HOST_DEVICE output_offsetalator(void* data, data_type dtype) : base_normalator(