From 9af26dc3056ab6ad8a8bd3f1e7b6123104f70a6e Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Thu, 22 Aug 2024 12:20:04 -0700 Subject: [PATCH 1/5] adds tabulate output iterator --- thrust/testing/tabulate_output_iterator.cu | 138 ++++++++++++++++++ .../detail/tabulate_output_iterator.inl | 72 +++++++++ .../iterator/tabulate_output_iterator.h | 116 +++++++++++++++ 3 files changed, 326 insertions(+) create mode 100644 thrust/testing/tabulate_output_iterator.cu create mode 100644 thrust/thrust/iterator/detail/tabulate_output_iterator.inl create mode 100644 thrust/thrust/iterator/tabulate_output_iterator.h diff --git a/thrust/testing/tabulate_output_iterator.cu b/thrust/testing/tabulate_output_iterator.cu new file mode 100644 index 00000000000..766e47caeb7 --- /dev/null +++ b/thrust/testing/tabulate_output_iterator.cu @@ -0,0 +1,138 @@ +#include <thrust/copy.h> +#include <thrust/device_vector.h> +#include <thrust/functional.h> +#include <thrust/gather.h> +#include <thrust/host_vector.h> +#include <thrust/iterator/counting_iterator.h> +#include <thrust/iterator/tabulate_output_iterator.h> +#include <thrust/iterator/transform_iterator.h> +#include <thrust/iterator/zip_iterator.h> +#include <thrust/reduce.h> +#include <thrust/sequence.h> + +#include <cuda/std/type_traits> + +#include <unittest/unittest.h> + +template <typename OutItT> +struct host_write_op +{ + OutItT out; + + template <typename IndexT, typename T> + __host__ __forceinline__ void operator()(IndexT index, T val) + { + // val is a thrust::tuple(value, input_index). Only write out the value part. + out[index] = thrust::get<0>(val); + } +}; + +template <typename OutItT> +struct device_write_op +{ + OutItT out; + + template <typename IndexT, typename T> + __device__ __forceinline__ void operator()(IndexT index, T val) + { + // val is a thrust::tuple(value, input_index). Only write out the value part. + out[index] = thrust::get<0>(val); + } +}; + +struct select_op +{ + std::size_t select_every_nth; + + template <typename T, typename IndexT> + __device__ __host__ __forceinline__ bool operator()(thrust::tuple<T, IndexT> key_index_pair) + { + // Select every n-th item + return (thrust::get<1>(key_index_pair) % select_every_nth == 0); + } +}; + +struct index_to_gather_index_op +{ + std::size_t gather_stride; + + template <typename IndexT> + __device__ __host__ __forceinline__ IndexT operator()(IndexT index) + { + // Gather the i-th output item from input[i*3] + return index * static_cast<IndexT>(gather_stride); + } +}; + +template <class Vector> +void TestTabulateOutputIterator() +{ + using T = typename Vector::value_type; + using it_t = typename Vector::iterator; + using space = typename thrust::iterator_system<typename Vector::iterator>::type; + + static constexpr std::size_t num_items = 240; + Vector input(num_items); + Vector output(num_items, T{42}); + + // Use operator type that supports the targeted system + using op_t = typename ::cuda::std::conditional<(::cuda::std::is_same<space, thrust::host_system_tag>::value), + host_write_op<it_t>, + device_write_op<it_t>>::type; + + // Construct tabulate_output_iterator + op_t op{output.begin()}; + auto tabulate_out_it = thrust::make_tabulate_output_iterator(op); + + // Prepare input + thrust::sequence(input.begin(), input.end(), 1); + auto iota_it = thrust::make_counting_iterator(0); + auto zipped_in = thrust::make_zip_iterator(input.begin(), iota_it); + + // Run copy_if using tabulate_output_iterator as the output iterator + static constexpr std::size_t select_every_nth = 3; + auto selected_it_end = + thrust::copy_if(zipped_in, zipped_in + num_items, tabulate_out_it, select_op{select_every_nth}); + const auto num_selected = static_cast<std::size_t>(thrust::distance(tabulate_out_it, selected_it_end)); + + // Prepare expected data + Vector expected_output(num_items, T{42}); + const std::size_t expected_num_selected = (num_items + select_every_nth - 1) / select_every_nth; + auto gather_index_it = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), index_to_gather_index_op{select_every_nth}); + thrust::gather(gather_index_it, gather_index_it + expected_num_selected, input.cbegin(), expected_output.begin()); + + ASSERT_EQUAL(expected_num_selected, num_selected); + ASSERT_EQUAL(output, expected_output); +} +DECLARE_VECTOR_UNITTEST(TestTabulateOutputIterator); + +void TestTabulateOutputIterator() +{ + using vector_t = thrust::host_vector<int>; + using vec_it_t = typename vector_t::iterator; + using op_t = host_write_op<vec_it_t>; + + vector_t out(4, 42); + thrust::tabulate_output_iterator<op_t> tabulate_out_it{op_t{out.begin()}}; + + tabulate_out_it[1] = 2; + ASSERT_EQUAL(out[0], 42); + ASSERT_EQUAL(out[1], 2); + ASSERT_EQUAL(out[2], 42); + ASSERT_EQUAL(out[3], 42); + + tabulate_out_it[3] = 0; + ASSERT_EQUAL(out[0], 42); + ASSERT_EQUAL(out[1], 2); + ASSERT_EQUAL(out[2], 42); + ASSERT_EQUAL(out[3], 0); + + tabulate_out_it[1] = 4; + ASSERT_EQUAL(out[0], 42); + ASSERT_EQUAL(out[1], 4); + ASSERT_EQUAL(out[2], 42); + ASSERT_EQUAL(out[3], 0); +} + +DECLARE_UNITTEST(TestTabulateOutputIterator); diff --git a/thrust/thrust/iterator/detail/tabulate_output_iterator.inl b/thrust/thrust/iterator/detail/tabulate_output_iterator.inl new file mode 100644 index 00000000000..f473abf5636 --- /dev/null +++ b/thrust/thrust/iterator/detail/tabulate_output_iterator.inl @@ -0,0 +1,72 @@ +// SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#pragma once + +#include <thrust/detail/config.h> + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include <thrust/iterator/counting_iterator.h> +#include <thrust/iterator/iterator_adaptor.h> +#include <thrust/iterator/tabulate_output_iterator.h> + +THRUST_NAMESPACE_BEGIN + +template <typename BinaryFunction, typename System, typename DifferenceT> +class tabulate_output_iterator; + +namespace detail +{ + +// Proxy reference that invokes a BinaryFunction with the index of the dereferenced iterator and the assigned value +template <typename BinaryFunction, typename DifferenceT> +class tabulate_output_iterator_proxy +{ +public: + _CCCL_HOST_DEVICE tabulate_output_iterator_proxy(BinaryFunction fun, DifferenceT index) + : fun(fun) + , index(index) + {} + + _CCCL_EXEC_CHECK_DISABLE + template <typename T> + _CCCL_HOST_DEVICE tabulate_output_iterator_proxy operator=(const T& x) + { + fun(index, x); + return *this; + } + +private: + BinaryFunction fun; + DifferenceT index; +}; + +// Compute the iterator_adaptor instantiation to be used for tabulate_output_iterator +template <typename BinaryFunction, typename System, typename DifferenceT> +struct tabulate_output_iterator_base +{ + using type = + thrust::iterator_adaptor<tabulate_output_iterator<BinaryFunction, System, DifferenceT>, + counting_iterator<DifferenceT>, + thrust::use_default, + System, + thrust::use_default, + tabulate_output_iterator_proxy<BinaryFunction, DifferenceT>>; +}; + +// Register tabulate_output_iterator_proxy with 'is_proxy_reference' from +// type_traits to enable its use with algorithms. +template <class BinaryFunction, class OutputIterator> +struct is_proxy_reference<tabulate_output_iterator_proxy<BinaryFunction, OutputIterator>> + : public thrust::detail::true_type +{}; + +} // namespace detail +THRUST_NAMESPACE_END diff --git a/thrust/thrust/iterator/tabulate_output_iterator.h b/thrust/thrust/iterator/tabulate_output_iterator.h new file mode 100644 index 00000000000..e290489e172 --- /dev/null +++ b/thrust/thrust/iterator/tabulate_output_iterator.h @@ -0,0 +1,116 @@ +// SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#pragma once + +#include <thrust/detail/config.h> + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header +#include <thrust/iterator/detail/tabulate_output_iterator.inl> + +THRUST_NAMESPACE_BEGIN + +/*! \addtogroup iterators + * \{ + */ + +/*! \addtogroup fancyiterator Fancy Iterators + * \ingroup iterators + * \{ + */ + +/*! \p tabulate_output_iterator is a special kind of output iterator which, whenever a value is assigned to a + * dereferenced iterator, calls the given callable with the index of the dereferenced iterator and the the assigned + * value. + * + * The following code snippet demonstrated how to create a \p tabulate_output_iterator which prints the index and the + * assigned value. + * + * \code + * #include <thrust/iterator/tabulate_output_iterator.h> + * + * // note: functor inherits form binary function + * struct print_op : public thrust::binary_function<int, float, void> + * { + * __host__ __device__ + * void operator()(int index, float value) const + * { + * printf("%d: %f\n", index, value); + * } + * }; + * + * int main() + * { + * auto tabulate_it = thrust::make_tabulate_output_iterator(print_op{}); + * + * tabulate_it[0] = 1.0f; // prints: 0: 1.0 + * tabulate_it[1] = 3.0f; // prints: 1: 3.0 + * tabulate_it[9] = 5.0f; // prints: 9: 5.0 + * } + * \endcode + * + * \see make_tabulate_output_iterator + */ + +template <typename BinaryFunction, typename System = use_default, typename DifferenceT = ptrdiff_t> +class tabulate_output_iterator : public detail::tabulate_output_iterator_base<BinaryFunction, System, DifferenceT>::type +{ + /*! \cond + */ + +public: + using super_t = typename detail::tabulate_output_iterator_base<BinaryFunction, System, DifferenceT>::type; + + friend class thrust::iterator_core_access; + /*! \endcond + */ + + tabulate_output_iterator() = default; + + /*! This constructor takes as argument a \c BinaryFunction and copies it to a new \p tabulate_output_iterator + * + * \param fun A \c BinaryFunction called whenever a value is assigned to this \p tabulate_output_iterator. + */ + _CCCL_HOST_DEVICE tabulate_output_iterator(BinaryFunction fun) + : fun(fun) + {} + + /*! \cond + */ + +private: + _CCCL_HOST_DEVICE typename super_t::reference dereference() const + { + return detail::tabulate_output_iterator_proxy<BinaryFunction, DifferenceT>(fun, *this->base()); + } + + BinaryFunction fun; + + /*! \endcond + */ +}; // end tabulate_output_iterator + +/*! \p make_tabulate_output_iterator creates a \p tabulate_output_iterator from a \c BinaryFunction. + * + * \param fun The \c BinaryFunction invoked whenever assigning to a dereferenced \p tabulate_output_iterator + * \see tabulate_output_iterator + */ +template <typename BinaryFunction> +tabulate_output_iterator<BinaryFunction> _CCCL_HOST_DEVICE make_tabulate_output_iterator(BinaryFunction fun) +{ + return tabulate_output_iterator<BinaryFunction>(fun); +} // end make_tabulate_output_iterator + +/*! \} // end fancyiterators + */ + +/*! \} // end iterators + */ + +THRUST_NAMESPACE_END From 5b3c411165c785a04013f26b8d55865d695ffab7 Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Thu, 22 Aug 2024 22:51:27 -0700 Subject: [PATCH 2/5] uses cccl exec space macros --- thrust/testing/tabulate_output_iterator.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/thrust/testing/tabulate_output_iterator.cu b/thrust/testing/tabulate_output_iterator.cu index 766e47caeb7..8f76acf208c 100644 --- a/thrust/testing/tabulate_output_iterator.cu +++ b/thrust/testing/tabulate_output_iterator.cu @@ -20,7 +20,7 @@ struct host_write_op OutItT out; template <typename IndexT, typename T> - __host__ __forceinline__ void operator()(IndexT index, T val) + _CCCL_HOST void operator()(IndexT index, T val) { // val is a thrust::tuple(value, input_index). Only write out the value part. out[index] = thrust::get<0>(val); @@ -33,7 +33,7 @@ struct device_write_op OutItT out; template <typename IndexT, typename T> - __device__ __forceinline__ void operator()(IndexT index, T val) + _CCCL_DEVICE void operator()(IndexT index, T val) { // val is a thrust::tuple(value, input_index). Only write out the value part. out[index] = thrust::get<0>(val); @@ -45,7 +45,7 @@ struct select_op std::size_t select_every_nth; template <typename T, typename IndexT> - __device__ __host__ __forceinline__ bool operator()(thrust::tuple<T, IndexT> key_index_pair) + _CCCL_HOST_DEVICE bool operator()(thrust::tuple<T, IndexT> key_index_pair) { // Select every n-th item return (thrust::get<1>(key_index_pair) % select_every_nth == 0); @@ -57,7 +57,7 @@ struct index_to_gather_index_op std::size_t gather_stride; template <typename IndexT> - __device__ __host__ __forceinline__ IndexT operator()(IndexT index) + _CCCL_HOST_DEVICE IndexT operator()(IndexT index) { // Gather the i-th output item from input[i*3] return index * static_cast<IndexT>(gather_stride); From 1b2bd98e500871b90c4d94841793e73062f24797 Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Fri, 23 Aug 2024 00:35:28 -0700 Subject: [PATCH 3/5] addresses review comments --- thrust/testing/tabulate_output_iterator.cu | 18 +++++++++++++++--- .../thrust/iterator/tabulate_output_iterator.h | 5 +++-- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/thrust/testing/tabulate_output_iterator.cu b/thrust/testing/tabulate_output_iterator.cu index 8f76acf208c..789ed6cf04e 100644 --- a/thrust/testing/tabulate_output_iterator.cu +++ b/thrust/testing/tabulate_output_iterator.cu @@ -19,6 +19,18 @@ struct host_write_op { OutItT out; + template <typename IndexT, typename T> + _CCCL_HOST void operator()(IndexT index, T val) + { + out[index] = val; + } +}; + +template <typename OutItT> +struct host_write_first_op +{ + OutItT out; + template <typename IndexT, typename T> _CCCL_HOST void operator()(IndexT index, T val) { @@ -28,7 +40,7 @@ struct host_write_op }; template <typename OutItT> -struct device_write_op +struct device_write_first_op { OutItT out; @@ -77,8 +89,8 @@ void TestTabulateOutputIterator() // Use operator type that supports the targeted system using op_t = typename ::cuda::std::conditional<(::cuda::std::is_same<space, thrust::host_system_tag>::value), - host_write_op<it_t>, - device_write_op<it_t>>::type; + host_write_first_op<it_t>, + device_write_first_op<it_t>>::type; // Construct tabulate_output_iterator op_t op{output.begin()}; diff --git a/thrust/thrust/iterator/tabulate_output_iterator.h b/thrust/thrust/iterator/tabulate_output_iterator.h index e290489e172..8dd26cec79b 100644 --- a/thrust/thrust/iterator/tabulate_output_iterator.h +++ b/thrust/thrust/iterator/tabulate_output_iterator.h @@ -12,6 +12,7 @@ #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) # pragma system_header #endif // no system header + #include <thrust/iterator/detail/tabulate_output_iterator.inl> THRUST_NAMESPACE_BEGIN @@ -26,8 +27,8 @@ THRUST_NAMESPACE_BEGIN */ /*! \p tabulate_output_iterator is a special kind of output iterator which, whenever a value is assigned to a - * dereferenced iterator, calls the given callable with the index of the dereferenced iterator and the the assigned - * value. + * dereferenced iterator, calls the given callable with the index that corresponds to the offset of the dereferenced + * iterator and the the assigned value. * * The following code snippet demonstrated how to create a \p tabulate_output_iterator which prints the index and the * assigned value. From 34f7b538e5d2bb1b30aa7c2b72815897eae2e3cc Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Fri, 23 Aug 2024 06:45:31 -0700 Subject: [PATCH 4/5] fixes documentation and example --- thrust/thrust/iterator/tabulate_output_iterator.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/thrust/thrust/iterator/tabulate_output_iterator.h b/thrust/thrust/iterator/tabulate_output_iterator.h index 8dd26cec79b..2dcfc5efcd7 100644 --- a/thrust/thrust/iterator/tabulate_output_iterator.h +++ b/thrust/thrust/iterator/tabulate_output_iterator.h @@ -28,7 +28,7 @@ THRUST_NAMESPACE_BEGIN /*! \p tabulate_output_iterator is a special kind of output iterator which, whenever a value is assigned to a * dereferenced iterator, calls the given callable with the index that corresponds to the offset of the dereferenced - * iterator and the the assigned value. + * iterator and the assigned value. * * The following code snippet demonstrated how to create a \p tabulate_output_iterator which prints the index and the * assigned value. @@ -37,7 +37,7 @@ THRUST_NAMESPACE_BEGIN * #include <thrust/iterator/tabulate_output_iterator.h> * * // note: functor inherits form binary function - * struct print_op : public thrust::binary_function<int, float, void> + * struct print_op * { * __host__ __device__ * void operator()(int index, float value) const From e7270b0e808d35a900cc1580bd0251018fa4e3d6 Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Fri, 23 Aug 2024 06:56:04 -0700 Subject: [PATCH 5/5] moves to using alias template instead of member type --- thrust/thrust/iterator/detail/tabulate_output_iterator.inl | 7 ++----- thrust/thrust/iterator/tabulate_output_iterator.h | 4 ++-- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/thrust/thrust/iterator/detail/tabulate_output_iterator.inl b/thrust/thrust/iterator/detail/tabulate_output_iterator.inl index f473abf5636..b5ed7258015 100644 --- a/thrust/thrust/iterator/detail/tabulate_output_iterator.inl +++ b/thrust/thrust/iterator/detail/tabulate_output_iterator.inl @@ -48,18 +48,15 @@ private: DifferenceT index; }; -// Compute the iterator_adaptor instantiation to be used for tabulate_output_iterator +// Alias template for the iterator_adaptor instantiation to be used for tabulate_output_iterator template <typename BinaryFunction, typename System, typename DifferenceT> -struct tabulate_output_iterator_base -{ - using type = +using tabulate_output_iterator_base = thrust::iterator_adaptor<tabulate_output_iterator<BinaryFunction, System, DifferenceT>, counting_iterator<DifferenceT>, thrust::use_default, System, thrust::use_default, tabulate_output_iterator_proxy<BinaryFunction, DifferenceT>>; -}; // Register tabulate_output_iterator_proxy with 'is_proxy_reference' from // type_traits to enable its use with algorithms. diff --git a/thrust/thrust/iterator/tabulate_output_iterator.h b/thrust/thrust/iterator/tabulate_output_iterator.h index 2dcfc5efcd7..af9a244063e 100644 --- a/thrust/thrust/iterator/tabulate_output_iterator.h +++ b/thrust/thrust/iterator/tabulate_output_iterator.h @@ -60,13 +60,13 @@ THRUST_NAMESPACE_BEGIN */ template <typename BinaryFunction, typename System = use_default, typename DifferenceT = ptrdiff_t> -class tabulate_output_iterator : public detail::tabulate_output_iterator_base<BinaryFunction, System, DifferenceT>::type +class tabulate_output_iterator : public detail::tabulate_output_iterator_base<BinaryFunction, System, DifferenceT> { /*! \cond */ public: - using super_t = typename detail::tabulate_output_iterator_base<BinaryFunction, System, DifferenceT>::type; + using super_t = detail::tabulate_output_iterator_base<BinaryFunction, System, DifferenceT>; friend class thrust::iterator_core_access; /*! \endcond