From 93d61722e7c04f6504a7707830864affc03e1d0e Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Mon, 4 Nov 2024 17:27:07 -0800 Subject: [PATCH] Refactor window storage (#627) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #621 Based on the offline discussions, this PR replaces the `window` logic with `bucket` and adds new overloads of `make_bucket_extent` so OA no longer relies on the `Container` type to determine the bucket extent. --------- Co-authored-by: Daniel Jünger --- README.md | 4 +- .../{aow_storage.cuh => bucket_storage.cuh} | 229 ++++++++-------- include/cuco/detail/equal_wrapper.cuh | 2 +- include/cuco/detail/extent/extent.inl | 141 +++++++--- .../cuco/detail/open_addressing/functors.cuh | 8 +- .../cuco/detail/open_addressing/kernels.cuh | 14 +- .../open_addressing/open_addressing_impl.cuh | 66 ++--- .../open_addressing_ref_impl.cuh | 252 +++++++++--------- include/cuco/detail/probe_sequence_impl.cuh | 2 +- include/cuco/detail/static_map.inl | 24 +- include/cuco/detail/static_map/helpers.cuh | 4 +- include/cuco/detail/static_map/kernels.cuh | 18 +- include/cuco/detail/static_map/static_map.inl | 4 +- .../cuco/detail/static_map/static_map_ref.inl | 82 +++--- .../static_multimap/device_view_impl.inl | 38 +-- .../static_multimap/static_multimap_ref.inl | 22 +- .../static_multiset/static_multiset_ref.inl | 20 +- include/cuco/detail/static_set/kernels.cuh | 4 +- include/cuco/detail/static_set/static_set.inl | 4 +- .../cuco/detail/static_set/static_set_ref.inl | 22 +- .../{aow_storage.inl => bucket_storage.inl} | 100 +++---- ...orage_base.cuh => bucket_storage_base.cuh} | 49 ++-- include/cuco/detail/storage/kernels.cuh | 18 +- include/cuco/detail/storage/storage.cuh | 12 +- include/cuco/extent.cuh | 124 +++++++-- include/cuco/static_map.cuh | 14 +- include/cuco/static_map_ref.cuh | 15 +- include/cuco/static_multimap.cuh | 12 +- include/cuco/static_multimap_ref.cuh | 15 +- include/cuco/static_multiset.cuh | 12 +- include/cuco/static_multiset_ref.cuh | 13 +- include/cuco/static_set.cuh | 12 +- include/cuco/static_set_ref.cuh | 15 +- include/cuco/storage.cuh | 24 +- tests/static_map/capacity_test.cu | 6 +- tests/static_set/capacity_test.cu | 6 +- tests/utility/storage_test.cu | 30 +-- 37 files changed, 807 insertions(+), 630 deletions(-) rename include/cuco/{aow_storage.cuh => bucket_storage.cuh} (50%) rename include/cuco/detail/storage/{aow_storage.inl => bucket_storage.inl} (53%) rename include/cuco/detail/storage/{aow_storage_base.cuh => bucket_storage_base.cuh} (53%) diff --git a/README.md b/README.md index bbc6fc18b..7703effa9 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,8 @@ Similar to how [Thrust](https://github.com/thrust/thrust) and [CUB](https://gith ### Major Updates +__11/01/2024__ Refined the term `window` as `bucket` + __01/08/2024__ Deprecated the `experimental` namespace __01/02/2024__ Moved the legacy `static_map` to `cuco::legacy` namespace @@ -254,4 +256,4 @@ We plan to add many GPU-accelerated, concurrent data structures to `cuCollection `cuco::bloom_filter` implements a Blocked Bloom Filter for approximate set membership queries. #### Examples: -- [Host-bulk APIs (Default fingerprinting policy)](https://github.com/NVIDIA/cuCollections/blob/dev/examples/bloom_filter/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/clientstate/eJydVmtvGjkU_StXsx8WmuEVbVUJQiSapLtoK5IF2qpaVsjj8TBWBnvqBwRF-e977ZmBgZBqtVRqwL6Pc889vvZzoJnWXAod9P9-Dngc9HthkBGxsmTFgn5AbUyCMNDSKup-d94tBLyDG5nvFF-lBhq0CZfdy99CmHwd345HcHM_fbifjubj-0nb2Xr7z5wyoVkMVsRMgUkZjHJC8U-5E8JXphwQuGx3oeEMFkG5twiaAx9lJy2syQ6ENGA1wzBcQ8IzBuyJstwAF0DlOs84EZTBlpvUpyrjeDjwvQwiI0PQnqBHjr-SuiUQs4fuPqkxeb_T2W63beJht6VadbLCWHc-j2_uJrO7FkLfu30RGTILiv2wXGHh0Q5IjsgoiRBvRrYgFZCVYrhnpEO-VdxwsQpBy8RsiWI-Tsy1UTyy5oi8CifWXzdA-ohA4kYzGM8WAXwczcaz0Mf5Np7_cf9lDt9G0-loMh_fzeB-is2a3I5dq_DXJxhNvsOf48ltCAypw1TsKVeuCoTKHa0sLjicMXYEI5EFLJ0zyhNOoVIQrOSGKYFlQc7UmhdaQ5Cxj5PxNTfE-LVXxflUnYVYiF-4oJmNGVxRS2UnyqRcL7Hvhqk2ten1sY1JldWmQ6UVpu02X23FbIMplhtGjVTnTdgTo9YBW-YSm7Y7b6Wxuwyl1j7FwCU2hZG1X-bCoOK4aGwkj5sL8YyFgVukWLdxHIOw6-Uj22kntiH0ur92u90B7D-dTucKfmeCKWJYuQ3O_nwkkxduw0Pcd9Btvx-UkcbIrjKe64QrbSAlWeLjuWCy3PD0vpFAvErQKjMPXntoG5Xd0uhx6SvzOPCr22xVmygR8M0tszvynKmDBmaXF0I7tgCcBE5eaLL0JkOXfbB3neVk60553auYDDFLiM0MFA12mjwF5Kt3kuv365q7qnJdl_GeaxW-lKm1ift91KCBqys8kR9t9ojAPO-e47fxJFgOU7lyJK6Knru5WMDsY6yFgNoHw_tcTMRZmbtQZ79_pPMaatewRtU5P1v3LpWiG26rHbEVyrYZeo82ZnDfe80yDbFOK_nSWyHvdZdB3QAdvVj2thd1sRRWYh-mcDjaqgIcYBwafF7M5Tz3Ds6wlDOJ40aFIiwzNX_KWiRldu0scRBidxoF8BASkml2zN1ZR1F3PDoptRhlKX9Zpna107efqziFN1xane1KDeGs35eGjnN3EepU2iyGIh34q80oy1q51HixbBj44YHEzB-mw16dFTyq7i7Ur6gJD4VXjf0vcH2ZJiUG8Bbzt7W7RpjwqE6Gizui3N3W9QOhvWOuZEQyvN7wQomJIahzZamxGCushSmjsKeUR9yga8HrSd2fHqbXXTxjOZbl5oUskGArIkSOrHhfXtVWq8oNhmPTlOiUafcEif3MwnrP0yn2dIqSTvEGnUkmkS7HtgM7LI64X2xU8vIXWuNYbGcaVF8qT6yTQbMJnTJgIb9Cu0Xe5H_kPa2kvvSTvKJ5fkA6UeKLBb9XJPhl17dyPTmsnw48xVAVArr48wVfq-4NiK9CdXjUBmJDae_yve3htsxN8eINWhhoSC8ueh-gRRRNh3q9_NCFVgvvLYP_GczB4lZG1pF_Bmc8qsWklGa4uCkerriA9YrH4CWs9vHmONpH7oKXf_y_fwHeCexw)) \ No newline at end of file +- [Host-bulk APIs (Default fingerprinting policy)](https://github.com/NVIDIA/cuCollections/blob/dev/examples/bloom_filter/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/clientstate/eJydVmtvGjkU_StXsx8WmuEVbVUJQiSapLtoK5IF2qpaVsjj8TBWBnvqBwRF-e977ZmBgZBqtVRqwL6Pc889vvZzoJnWXAod9P9-Dngc9HthkBGxsmTFgn5AbUyCMNDSKup-d94tBLyDG5nvFF-lBhq0CZfdy99CmHwd345HcHM_fbifjubj-0nb2Xr7z5wyoVkMVsRMgUkZjHJC8U-5E8JXphwQuGx3oeEMFkG5twiaAx9lJy2syQ6ENGA1wzBcQ8IzBuyJstwAF0DlOs84EZTBlpvUpyrjeDjwvQwiI0PQnqBHjr-SuiUQs4fuPqkxeb_T2W63beJht6VadbLCWHc-j2_uJrO7FkLfu30RGTILiv2wXGHh0Q5IjsgoiRBvRrYgFZCVYrhnpEO-VdxwsQpBy8RsiWI-Tsy1UTyy5oi8CifWXzdA-ohA4kYzGM8WAXwczcaz0Mf5Np7_cf9lDt9G0-loMh_fzeB-is2a3I5dq_DXJxhNvsOf48ltCAypw1TsKVeuCoTKHa0sLjicMXYEI5EFLJ0zyhNOoVIQrOSGKYFlQc7UmhdaQ5Cxj5PxNTfE-LVXxflUnYVYiF-4oJmNGVxRS2UnyqRcL7Hvhqk2ten1sY1JldWmQ6UVpu02X23FbIMplhtGjVTnTdgTo9YBW-YSm7Y7b6Wxuwyl1j7FwCU2hZG1X-bCoOK4aGwkj5sL8YyFgVukWLdxHIOw6-Uj22kntiH0ur92u90B7D-dTucKfmeCKWJYuQ3O_nwkkxduw0Pcd9Btvx-UkcbIrjKe64QrbSAlWeLjuWCy3PD0vpFAvErQKjMPXntoG5Xd0uhx6SvzOPCr22xVmygR8M0tszvynKmDBmaXF0I7tgCcBE5eaLL0JkOXfbB3neVk60553auYDDFLiM0MFA12mjwF5Kt3kuv365q7qnJdl_GeaxW-lKm1ift91KCBqys8kR9t9ojAPO-e47fxJFgOU7lyJK6Knru5WMDsY6yFgNoHw_tcTMRZmbtQZ79_pPMaatewRtU5P1v3LpWiG26rHbEVyrYZeo82ZnDfe80yDbFOK_nSWyHvdZdB3QAdvVj2thd1sRRWYh-mcDjaqgIcYBwafF7M5Tz3Ds6wlDOJ40aFIiwzNX_KWiRldu0scRBidxoF8BASkml2zN1ZR1F3PDoptRhlKX9Zpna107efqziFN1xane1KDeGs35eGjnN3EepU2iyGIh34q80oy1q51HixbBj44YHEzB-mw16dFTyq7i7Ur6gJD4VXjf0vcH2ZJiUG8Bbzt7W7RpjwqE6Gizui3N3W9QOhvWOuZEQyvN7wQomJIahzZamxGCushSmjsKeUR9yga8HrSd2fHqbXXTxjOZbl5oUskGArIkSOrHhfXtVWq8oNhmPTlOiUafcEif3MwnrP0yn2dIqSTvEGnUkmkS7HtgM7LI64X2xU8vIXWuNYbGcaVF8qT6yTQbMJnTJgIb9Cu0Xe5H_kPa2kvvSTvKJ5fkA6UeKLBb9XJPhl17dyPTmsnw48xVAVArr48wVfq-4NiK9CdXjUBmJDae_yve3htsxN8eINWhhoSC8ueh-gRRRNh3q9_NCFVgvvLYP_GczB4lZG1pF_Bmc8qsWklGa4uCkerriA9YrH4CWs9vHmONpH7oKXf_y_fwHeCexw)) diff --git a/include/cuco/aow_storage.cuh b/include/cuco/bucket_storage.cuh similarity index 50% rename from include/cuco/aow_storage.cuh rename to include/cuco/bucket_storage.cuh index abdbaaff7..d5005ea89 100644 --- a/include/cuco/aow_storage.cuh +++ b/include/cuco/bucket_storage.cuh @@ -16,7 +16,7 @@ #pragma once -#include +#include #include #include @@ -29,200 +29,213 @@ #include namespace cuco { +/// Bucket type alias +template +using bucket = detail::bucket; -/// Window type alias -template -using window = detail::window; - -/// forward declaration -template -class aow_storage_ref; +/// Alias for bucket +template +using window = bucket; /** - * @brief Array of Window open addressing storage class. + * @brief Non-owning array of buckets storage reference type. * - * @tparam T Slot type - * @tparam WindowSize Number of slots in each window - * @tparam Extent Type of extent denoting number of windows - * @tparam Allocator Type of allocator used for device storage (de)allocation + * @tparam T Storage element type + * @tparam BucketSize Number of slots in each bucket + * @tparam Extent Type of extent denoting storage capacity */ -template , - typename Allocator = cuco::cuda_allocator>> -class aow_storage : public detail::aow_storage_base { +template > +class bucket_storage_ref : public detail::bucket_storage_base { public: - using base_type = detail::aow_storage_base; ///< AoW base class type + /// Array of buckets base class type + using base_type = detail::bucket_storage_base; - using base_type::window_size; ///< Number of elements processed per window + using base_type::bucket_size; ///< Number of elements processed per bucket using extent_type = typename base_type::extent_type; ///< Storage extent type using size_type = typename base_type::size_type; ///< Storage size type using value_type = typename base_type::value_type; ///< Slot type - using window_type = typename base_type::window_type; ///< Slot window type + using bucket_type = typename base_type::bucket_type; ///< Slot bucket type using base_type::capacity; - using base_type::num_windows; - - /// Type of the allocator to (de)allocate windows - using allocator_type = - typename std::allocator_traits::template rebind_alloc; - using window_deleter_type = - detail::custom_deleter; ///< Type of window deleter - using ref_type = aow_storage_ref; ///< Storage ref type + using base_type::num_buckets; /** - * @brief Constructor of AoW storage. - * - * @note The input `size` should be exclusively determined by the return value of - * `make_window_extent` since it depends on the requested low-bound value, the probing scheme, and - * the storage. + * @brief Constructor of AoS storage ref. * - * @param size Number of windows to (de)allocate - * @param allocator Allocator used for (de)allocating device storage + * @param size Number of buckets + * @param buckets Pointer to the buckets array */ - explicit constexpr aow_storage(Extent size, Allocator const& allocator = {}); + __host__ __device__ explicit constexpr bucket_storage_ref(Extent size, + bucket_type* buckets) noexcept; - aow_storage(aow_storage&&) = default; ///< Move constructor /** - * @brief Replaces the contents of the storage with another storage. + * @brief Custom un-incrementable input iterator for the convenience of `find` operations. * - * @return Reference of the current storage object + * @note This iterator is for read only and NOT incrementable. */ - aow_storage& operator=(aow_storage&&) = default; - ~aow_storage() = default; ///< Destructor - - aow_storage(aow_storage const&) = delete; - aow_storage& operator=(aow_storage const&) = delete; + struct iterator; + using const_iterator = iterator const; ///< Const forward iterator type /** - * @brief Gets windows array. + * @brief Returns an iterator to one past the last slot. + * + * This is provided for convenience for those familiar with checking + * an iterator returned from `find()` against the `end()` iterator. * - * @return Pointer to the first window + * @return An iterator to one past the last slot */ - [[nodiscard]] constexpr window_type* data() const noexcept; + [[nodiscard]] __device__ constexpr iterator end() noexcept; /** - * @brief Gets the storage allocator. + * @brief Returns a const_iterator to one past the last slot. * - * @return The storage allocator + * This is provided for convenience for those familiar with checking + * an iterator returned from `find()` against the `end()` iterator. + * + * @return A const_iterator to one past the last slot */ - [[nodiscard]] constexpr allocator_type allocator() const noexcept; + [[nodiscard]] __device__ constexpr const_iterator end() const noexcept; /** - * @brief Gets window storage reference. + * @brief Gets buckets array. * - * @return Reference of window storage + * @return Pointer to the first bucket */ - [[nodiscard]] constexpr ref_type ref() const noexcept; + [[nodiscard]] __device__ constexpr bucket_type* data() noexcept; /** - * @brief Initializes each slot in the AoW storage to contain `key`. + * @brief Gets bucket array. * - * @param key Key to which all keys in `slots` are initialized - * @param stream Stream used for executing the kernel + * @return Pointer to the first bucket */ - void initialize(value_type key, cuda::stream_ref stream = {}); + [[nodiscard]] __device__ constexpr bucket_type* data() const noexcept; /** - * @brief Asynchronously initializes each slot in the AoW storage to contain `key`. + * @brief Returns an array of slots (or a bucket) for a given index. * - * @param key Key to which all keys in `slots` are initialized - * @param stream Stream used for executing the kernel + * @param index Index of the bucket + * @return An array of slots */ - void initialize_async(value_type key, cuda::stream_ref stream = {}) noexcept; + [[nodiscard]] __device__ constexpr bucket_type operator[](size_type index) const noexcept; private: - allocator_type allocator_; ///< Allocator used to (de)allocate windows - window_deleter_type window_deleter_; ///< Custom windows deleter - std::unique_ptr windows_; ///< Pointer to AoW storage + bucket_type* buckets_; ///< Pointer to the buckets array }; /** - * @brief Non-owning AoW storage reference type. + * @brief Array of buckets open addressing storage class. * - * @tparam T Storage element type - * @tparam WindowSize Number of slots in each window - * @tparam Extent Type of extent denoting storage capacity + * @tparam T Slot type + * @tparam BucketSize Number of slots in each bucket + * @tparam Extent Type of extent denoting number of buckets + * @tparam Allocator Type of allocator used for device storage (de)allocation */ -template > -class aow_storage_ref : public detail::aow_storage_base { +template , + typename Allocator = cuco::cuda_allocator>> +class bucket_storage : public detail::bucket_storage_base { public: - using base_type = detail::aow_storage_base; ///< AoW base class type + /// Array of buckets base class type + using base_type = detail::bucket_storage_base; - using base_type::window_size; ///< Number of elements processed per window + using base_type::bucket_size; ///< Number of elements processed per bucket using extent_type = typename base_type::extent_type; ///< Storage extent type using size_type = typename base_type::size_type; ///< Storage size type using value_type = typename base_type::value_type; ///< Slot type - using window_type = typename base_type::window_type; ///< Slot window type + using bucket_type = typename base_type::bucket_type; ///< Slot bucket type using base_type::capacity; - using base_type::num_windows; + using base_type::num_buckets; + + /// Type of the allocator to (de)allocate buckets + using allocator_type = + typename std::allocator_traits::template rebind_alloc; + using bucket_deleter_type = + detail::custom_deleter; ///< Type of bucket deleter + using ref_type = bucket_storage_ref; ///< Storage ref type /** - * @brief Constructor of AoS storage ref. + * @brief Constructor of bucket storage. + * + * @note The input `size` should be exclusively determined by the return value of + * `make_bucket_extent` since it depends on the requested low-bound value, the probing scheme, and + * the storage. * - * @param size Number of windows - * @param windows Pointer to the windows array + * @param size Number of buckets to (de)allocate + * @param allocator Allocator used for (de)allocating device storage */ - __host__ __device__ explicit constexpr aow_storage_ref(Extent size, - window_type* windows) noexcept; + explicit constexpr bucket_storage(Extent size, Allocator const& allocator = {}); + bucket_storage(bucket_storage&&) = default; ///< Move constructor /** - * @brief Custom un-incrementable input iterator for the convenience of `find` operations. + * @brief Replaces the contents of the storage with another storage. * - * @note This iterator is for read only and NOT incrementable. + * @return Reference of the current storage object */ - struct iterator; - using const_iterator = iterator const; ///< Const forward iterator type + bucket_storage& operator=(bucket_storage&&) = default; + ~bucket_storage() = default; ///< Destructor + + bucket_storage(bucket_storage const&) = delete; + bucket_storage& operator=(bucket_storage const&) = delete; /** - * @brief Returns an iterator to one past the last slot. + * @brief Gets buckets array. * - * This is provided for convenience for those familiar with checking - * an iterator returned from `find()` against the `end()` iterator. - * - * @return An iterator to one past the last slot + * @return Pointer to the first bucket */ - [[nodiscard]] __device__ constexpr iterator end() noexcept; + [[nodiscard]] constexpr bucket_type* data() const noexcept; /** - * @brief Returns a const_iterator to one past the last slot. - * - * This is provided for convenience for those familiar with checking - * an iterator returned from `find()` against the `end()` iterator. + * @brief Gets the storage allocator. * - * @return A const_iterator to one past the last slot + * @return The storage allocator */ - [[nodiscard]] __device__ constexpr const_iterator end() const noexcept; + [[nodiscard]] constexpr allocator_type allocator() const noexcept; /** - * @brief Gets windows array. + * @brief Gets bucket storage reference. * - * @return Pointer to the first window + * @return Reference of bucket storage */ - [[nodiscard]] __device__ constexpr window_type* data() noexcept; + [[nodiscard]] constexpr ref_type ref() const noexcept; /** - * @brief Gets windows array. + * @brief Initializes each slot in the bucket storage to contain `key`. * - * @return Pointer to the first window + * @param key Key to which all keys in `slots` are initialized + * @param stream Stream used for executing the kernel */ - [[nodiscard]] __device__ constexpr window_type* data() const noexcept; + void initialize(value_type key, cuda::stream_ref stream = {}); /** - * @brief Returns an array of slots (or a window) for a given index. + * @brief Asynchronously initializes each slot in the bucket storage to contain `key`. * - * @param index Index of the window - * @return An array of slots + * @param key Key to which all keys in `slots` are initialized + * @param stream Stream used for executing the kernel */ - [[nodiscard]] __device__ constexpr window_type operator[](size_type index) const noexcept; + void initialize_async(value_type key, cuda::stream_ref stream = {}) noexcept; private: - window_type* windows_; ///< Pointer to the windows array + allocator_type allocator_; ///< Allocator used to (de)allocate buckets + bucket_deleter_type bucket_deleter_; ///< Custom buckets deleter + /// Pointer to the bucket storage + std::unique_ptr buckets_; }; +/// Alias for bucket_storage_ref +template > +using aow_storage_ref = bucket_storage_ref; + +/// Alias for bucket_storage +template , + typename Allocator = cuco::cuda_allocator>> +using aow_storage = bucket_storage; + } // namespace cuco -#include +#include diff --git a/include/cuco/detail/equal_wrapper.cuh b/include/cuco/detail/equal_wrapper.cuh index a6f7e4d60..9dc6b030b 100644 --- a/include/cuco/detail/equal_wrapper.cuh +++ b/include/cuco/detail/equal_wrapper.cuh @@ -81,7 +81,7 @@ struct equal_wrapper { * * @note This function always compares the right-hand side element against sentinel values first * then performs a equality check with the given `equal_` callable, i.e., `equal_(lhs, rhs)`. - * @note Container (like set or map) buckets MUST be always on the right-hand side. + * @note Container (like set or map) slots MUST be always on the right-hand side. * * @tparam IsInsert Flag indicating whether it's an insert equality check or not. Insert probing * stops when it's an empty or erased slot while query probing stops only when it's empty. diff --git a/include/cuco/detail/extent/extent.inl b/include/cuco/detail/extent/extent.inl index 13f056a57..916d75e2c 100644 --- a/include/cuco/detail/extent/extent.inl +++ b/include/cuco/detail/extent/extent.inl @@ -20,35 +20,40 @@ #include // TODO move to detail/extent/ #include #include +#include +#include #include -#include +#include namespace cuco { template -struct window_extent { +struct bucket_extent { using value_type = SizeType; ///< Extent value type __host__ __device__ constexpr value_type value() const noexcept { return N; } __host__ __device__ explicit constexpr operator value_type() const noexcept { return value(); } private: - __host__ __device__ explicit constexpr window_extent() noexcept {} - __host__ __device__ explicit constexpr window_extent(SizeType) noexcept {} + __host__ __device__ explicit constexpr bucket_extent() noexcept {} + __host__ __device__ explicit constexpr bucket_extent(SizeType) noexcept {} - template - friend auto constexpr make_window_extent(extent ext); + template + friend auto constexpr make_bucket_extent(extent ext); + + template + friend auto constexpr make_bucket_extent(extent ext); template - friend __host__ __device__ constexpr value_type operator-(window_extent const& lhs, + friend __host__ __device__ constexpr value_type operator-(bucket_extent const& lhs, Rhs rhs) noexcept { return lhs.value() - rhs; } template - friend __host__ __device__ constexpr value_type operator/(window_extent const& lhs, + friend __host__ __device__ constexpr value_type operator/(bucket_extent const& lhs, Rhs rhs) noexcept { return lhs.value() / rhs; @@ -56,7 +61,7 @@ struct window_extent { template friend __host__ __device__ constexpr value_type operator%(Lhs lhs, - window_extent const& rhs) noexcept + bucket_extent const& rhs) noexcept { return lhs % rhs.value(); ; @@ -64,31 +69,22 @@ struct window_extent { }; template -struct window_extent : cuco::utility::fast_int { +struct bucket_extent : cuco::utility::fast_int { using value_type = typename cuco::utility::fast_int::fast_int::value_type; ///< Extent value type private: using cuco::utility::fast_int::fast_int; - template - friend auto constexpr make_window_extent(extent ext); -}; - -template -[[nodiscard]] auto constexpr make_window_extent(extent ext) -{ - return make_window_extent(ext); -} + template + friend auto constexpr make_bucket_extent(extent ext); -template -[[nodiscard]] auto constexpr make_window_extent(SizeType size) -{ - return make_window_extent(extent{size}); -} + template + friend auto constexpr make_bucket_extent(extent ext); +}; -template -[[nodiscard]] auto constexpr make_window_extent(extent ext) +template +[[nodiscard]] auto constexpr make_bucket_extent(extent ext) { auto constexpr max_prime = cuco::detail::primes.back(); auto constexpr max_value = @@ -96,17 +92,17 @@ template ? std::numeric_limits::max() : static_cast(max_prime); auto const size = cuco::detail::int_div_ceil( - std::max(static_cast(ext), static_cast(1)), CGSize * WindowSize); + std::max(static_cast(ext), static_cast(1)), CGSize * BucketSize); if (size > max_value) { CUCO_FAIL("Invalid input extent"); } if constexpr (N == dynamic_extent) { - return window_extent{static_cast( + return bucket_extent{static_cast( *cuco::detail::lower_bound( cuco::detail::primes.begin(), cuco::detail::primes.end(), static_cast(size)) * CGSize)}; } if constexpr (N != dynamic_extent) { - return window_extent( *cuco::detail::lower_bound(cuco::detail::primes.begin(), cuco::detail::primes.end(), @@ -115,22 +111,99 @@ template } } -template +template +[[nodiscard]] auto constexpr make_window_extent(extent ext) +{ + return make_bucket_extent(ext); +} + +template +[[nodiscard]] auto constexpr make_bucket_extent(SizeType size) +{ + return make_bucket_extent(extent{size}); +} + +template +[[nodiscard]] auto constexpr make_window_extent(SizeType size) +{ + return make_bucket_extent(extent{size}); +} + +template +[[nodiscard]] auto constexpr make_bucket_extent(extent ext) +{ + return make_bucket_extent(ext); + /* +// TODO fix linear probing with exact capacity +if constexpr (cuco::is_double_hashing::value) { + return make_bucket_extent(ext); +} else { + auto const size = cuco::detail::int_div_ceil( + cuda::std::max(static_cast(ext), static_cast(1)), + ProbingScheme::cg_size * Storage::bucket_size) + + cuda::std::min(static_cast(ext), static_cast(1)); + if constexpr (N == dynamic_extent) { + return bucket_extent{size * ProbingScheme::cg_size}; + } else { + return bucket_extent{}; + } +} +*/ +} + +template +[[nodiscard]] auto constexpr make_bucket_extent(SizeType size) +{ + return make_bucket_extent( + cuco::extent{size}); +} + +template +[[nodiscard]] auto constexpr make_bucket_extent(extent ext) +{ + return make_bucket_extent(ext); +} + +template +[[nodiscard]] auto constexpr make_window_extent(extent ext) +{ + return make_bucket_extent(ext); +} + +template +[[nodiscard]] auto constexpr make_bucket_extent(SizeType size) +{ + return make_bucket_extent(extent{size}); +} + +template [[nodiscard]] auto constexpr make_window_extent(SizeType size) { - return make_window_extent(extent{size}); + return make_bucket_extent(extent{size}); } namespace detail { template -struct is_window_extent : std::false_type {}; +struct is_bucket_extent : cuda::std::false_type {}; template -struct is_window_extent> : std::true_type {}; +struct is_bucket_extent> : cuda::std::true_type {}; template -inline constexpr bool is_window_extent_v = is_window_extent::value; +inline constexpr bool is_bucket_extent_v = is_bucket_extent::value; } // namespace detail } // namespace cuco diff --git a/include/cuco/detail/open_addressing/functors.cuh b/include/cuco/detail/open_addressing/functors.cuh index 7aacfa042..b94f80226 100644 --- a/include/cuco/detail/open_addressing/functors.cuh +++ b/include/cuco/detail/open_addressing/functors.cuh @@ -45,13 +45,13 @@ struct get_slot { */ __device__ constexpr auto operator()(typename StorageRef::size_type idx) const noexcept { - auto const window_idx = idx / StorageRef::window_size; - auto const intra_idx = idx % StorageRef::window_size; + auto const bucket_idx = idx / StorageRef::bucket_size; + auto const intra_idx = idx % StorageRef::bucket_size; if constexpr (HasPayload) { - auto const [first, second] = storage_[window_idx][intra_idx]; + auto const [first, second] = storage_[bucket_idx][intra_idx]; return thrust::make_tuple(first, second); } else { - return storage_[window_idx][intra_idx]; + return storage_[bucket_idx][intra_idx]; } } }; diff --git a/include/cuco/detail/open_addressing/kernels.cuh b/include/cuco/detail/open_addressing/kernels.cuh index 4726683a3..b0457c071 100644 --- a/include/cuco/detail/open_addressing/kernels.cuh +++ b/include/cuco/detail/open_addressing/kernels.cuh @@ -627,7 +627,7 @@ CUCO_KERNEL __launch_bounds__(BlockSize) void count(InputIt first, } /** - * @brief Calculates the number of filled slots for the given window storage. + * @brief Calculates the number of filled slots for the given bucket storage. * * @tparam BlockSize Number of threads in each block * @tparam StorageRef Type of non-owning ref allowing access to storage @@ -649,12 +649,12 @@ CUCO_KERNEL __launch_bounds__(BlockSize) void size(StorageRef storage, auto idx = cuco::detail::global_thread_id(); size_type thread_count = 0; - auto const n = storage.num_windows(); + auto const n = storage.num_buckets(); while (idx < n) { - auto const window = storage[idx]; + auto const bucket = storage[idx]; #pragma unroll - for (auto const& it : window) { + for (auto const& it : bucket) { thread_count += static_cast(is_filled(it)); } idx += loop_stride; @@ -686,7 +686,7 @@ CUCO_KERNEL __launch_bounds__(BlockSize) void rehash( auto const tile_rank = tile.meta_group_rank(); auto const loop_stride = cuco::detail::grid_stride(); auto idx = cuco::detail::global_thread_id(); - auto const n = storage_ref.num_windows(); + auto const n = storage_ref.num_buckets(); while (idx - thread_rank < n) { if (thread_rank == 0) { buffer_size = 0; } @@ -694,9 +694,9 @@ CUCO_KERNEL __launch_bounds__(BlockSize) void rehash( // gather values in shmem buffer if (idx < n) { - auto const window = storage_ref[idx]; + auto const bucket = storage_ref[idx]; - for (auto const& slot : window) { + for (auto const& slot : bucket) { if (is_filled(slot)) { buffer[atomicAdd_block(&buffer_size, 1)] = slot; } } } diff --git a/include/cuco/detail/open_addressing/open_addressing_impl.cuh b/include/cuco/detail/open_addressing/open_addressing_impl.cuh index 3d7abc9a5..a6fd9b3c1 100644 --- a/include/cuco/detail/open_addressing/open_addressing_impl.cuh +++ b/include/cuco/detail/open_addressing/open_addressing_impl.cuh @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -56,7 +57,7 @@ namespace detail { * @tparam KeyEqual Binary callable type used to compare two keys for equality * @tparam ProbingScheme Probing scheme (see `include/cuco/probing_scheme.cuh` for choices) * @tparam Allocator Type of allocator used for device storage - * @tparam Storage Slot window storage type + * @tparam Storage Slot bucket storage type */ template (std::declval())); - using size_type = typename extent_type::value_type; ///< Size type - using key_equal = KeyEqual; ///< Key equality comparator type + using extent_type = + decltype(make_bucket_extent(std::declval())); + using size_type = typename extent_type::value_type; ///< Size type + using key_equal = KeyEqual; ///< Key equality comparator type using storage_type = detail::storage; ///< Storage type using allocator_type = typename storage_type::allocator_type; ///< Allocator type - using storage_ref_type = typename storage_type::ref_type; ///< Non-owning window storage ref type - using probing_scheme_type = ProbingScheme; ///< Probe scheme type - using hasher = typename probing_scheme_type::hasher; ///< Hash function type + using storage_ref_type = typename storage_type::ref_type; ///< Non-owning bucket storage ref type /** * @brief Constructs a statically-sized open addressing data structure with the specified initial * capacity, sentinel values and CUDA stream. * * @note The actual capacity depends on the given `capacity`, the probing scheme, CG size, and the - * window size and it is computed via the `make_window_extent` factory. Insert operations will not + * bucket size and it is computed via the `make_bucket_extent` factory. Insert operations will not * automatically grow the container. Attempting to insert more unique keys than the capacity of * the container results in undefined behavior. * @note Any `*_sentinel`s are reserved and behavior is undefined when attempting to insert @@ -132,7 +134,7 @@ class open_addressing_impl { erased_key_sentinel_{this->extract_key(empty_slot_sentinel)}, predicate_{pred}, probing_scheme_{probing_scheme}, - storage_{make_window_extent(capacity), alloc} + storage_{make_bucket_extent(capacity), alloc} { this->clear_async(stream); } @@ -145,7 +147,7 @@ class open_addressing_impl { * insert and the desired load factor without manually computing the desired capacity. The actual * capacity will be a size no smaller than `ceil(n / desired_load_factor)`. It's determined by * multiple factors including the given `n`, the desired load factor, the probing scheme, the CG - * size, and the window size and is computed via the `make_window_extent` factory. + * size, and the bucket size and is computed via the `make_bucket_extent` factory. * @note Insert operations will not automatically grow the container. * @note Attempting to insert more unique keys than the capacity of the container results in * undefined behavior. @@ -178,7 +180,7 @@ class open_addressing_impl { erased_key_sentinel_{this->extract_key(empty_slot_sentinel)}, predicate_{pred}, probing_scheme_{probing_scheme}, - storage_{make_window_extent( + storage_{make_bucket_extent( static_cast(std::ceil(static_cast(n) / desired_load_factor))), alloc} { @@ -193,7 +195,7 @@ class open_addressing_impl { * capacity, sentinel values and CUDA stream. * * @note The actual capacity depends on the given `capacity`, the probing scheme, CG size, and the - * window size and it is computed via the `make_window_extent` factory. Insert operations will not + * bucket size and it is computed via the `make_bucket_extent` factory. Insert operations will not * automatically grow the container. Attempting to insert more unique keys than the capacity of * the container results in undefined behavior. * @note Any `*_sentinel`s are reserved and behavior is undefined when attempting to insert @@ -220,7 +222,7 @@ class open_addressing_impl { erased_key_sentinel_{erased_key_sentinel}, predicate_{pred}, probing_scheme_{probing_scheme}, - storage_{make_window_extent(capacity), alloc} + storage_{make_bucket_extent(capacity), alloc} { CUCO_EXPECTS(this->empty_key_sentinel() != this->erased_key_sentinel(), "The empty key sentinel and erased key sentinel cannot be the same value.", @@ -802,14 +804,14 @@ class open_addressing_impl { this->empty_key_sentinel(), this->erased_key_sentinel()}; auto storage_ref = this->storage_ref(); - auto const op = [callback_op, is_filled, storage_ref] __device__(auto const window_slots) { - for (auto const slot : window_slots) { + auto const op = [callback_op, is_filled, storage_ref] __device__(auto const bucket_slots) { + for (auto const slot : bucket_slots) { if (is_filled(slot)) { callback_op(slot); } } }; CUCO_CUDA_TRY(cub::DeviceFor::ForEachCopyN( - storage_ref.data(), storage_ref.num_windows(), op, stream.get())); + storage_ref.data(), storage_ref.num_buckets(), op, stream.get())); } /** @@ -860,7 +862,7 @@ class open_addressing_impl { detail::counter_storage{this->allocator()}; counter.reset(stream); - auto const grid_size = cuco::detail::grid_size(storage_.num_windows()); + auto const grid_size = cuco::detail::grid_size(storage_.num_buckets()); auto const is_filled = open_addressing_ns::detail::slot_is_filled{ this->empty_key_sentinel(), this->erased_key_sentinel()}; @@ -881,7 +883,7 @@ class open_addressing_impl { * * @tparam Container The container type this function operates on * - * @param extent The container's new `window_extent` after this operation took place + * @param extent The container's new `bucket_extent` after this operation took place * @param container The container to be rehashed * @param stream CUDA stream used for this operation */ @@ -896,7 +898,7 @@ class open_addressing_impl { * @brief Asynchronously reserves at least the specified number of slots and regenerates the * container * - * @note Changes the number of windows to a value that is not less than `extent`, then + * @note Changes the number of buckets to a value that is not less than `extent`, then * rehashes the container, i.e. puts the elements into appropriate slots considering * that the total number of slots has changed. * @@ -910,7 +912,7 @@ class open_addressing_impl { * * @tparam Container The container type this function operates on * - * @param extent The container's new `window_extent` after this operation took place + * @param extent The container's new `bucket_extent` after this operation took place * @param container The container to be rehashed * @param stream CUDA stream used for this operation */ @@ -926,21 +928,21 @@ class open_addressing_impl { * * @tparam Container The container type this function operates on * - * @param extent The container's new `window_extent` after this operation took place + * @param extent The container's new `bucket_extent` after this operation took place * @param container The container to be rehashed * @param stream CUDA stream used for this operation */ template void rehash_async(Container const& container, cuda::stream_ref stream) { - this->rehash_async(this->storage_.window_extent(), container, stream); + this->rehash_async(this->storage_.bucket_extent(), container, stream); } /** * @brief Asynchronously reserves at least the specified number of slots and regenerates the * container * - * @note Changes the number of windows to a value that is not less than `extent`, then + * @note Changes the number of buckets to a value that is not less than `extent`, then * rehashes the container, i.e. puts the elements into appropriate slots considering * that the total number of slots has changed. * @@ -951,7 +953,7 @@ class open_addressing_impl { * * @tparam Container The container type this function operates on * - * @param extent The container's new `window_extent` after this operation took place + * @param extent The container's new `bucket_extent` after this operation took place * @param container The container to be rehashed * @param stream CUDA stream used for this operation */ @@ -962,12 +964,12 @@ class open_addressing_impl { new (&storage_) storage_type{extent, this->allocator()}; this->clear_async(stream); - auto const num_windows = old_storage.num_windows(); - if (num_windows == 0) { return; } + auto const num_buckets = old_storage.num_buckets(); + if (num_buckets == 0) { return; } auto constexpr block_size = cuco::detail::default_block_size(); auto constexpr stride = cuco::detail::default_stride(); - auto const grid_size = cuco::detail::grid_size(num_windows, 1, stride, block_size); + auto const grid_size = cuco::detail::grid_size(num_buckets, 1, stride, block_size); auto const is_filled = open_addressing_ns::detail::slot_is_filled{ this->empty_key_sentinel(), this->erased_key_sentinel()}; @@ -1164,8 +1166,8 @@ class open_addressing_impl { key_type erased_key_sentinel_; ///< Key value that represents an erased slot key_equal predicate_; ///< Key equality binary predicate probing_scheme_type probing_scheme_; ///< Probing scheme - storage_type storage_; ///< Slot window storage + storage_type storage_; ///< Slot bucket storage }; } // namespace detail -} // namespace cuco \ No newline at end of file +} // namespace cuco diff --git a/include/cuco/detail/open_addressing/open_addressing_ref_impl.cuh b/include/cuco/detail/open_addressing/open_addressing_ref_impl.cuh index 56a58a9e4..a8edb156e 100644 --- a/include/cuco/detail/open_addressing/open_addressing_ref_impl.cuh +++ b/include/cuco/detail/open_addressing/open_addressing_ref_impl.cuh @@ -43,21 +43,21 @@ namespace detail { enum class insert_result : int32_t { CONTINUE = 0, SUCCESS = 1, DUPLICATE = 2 }; /** - * @brief Helper struct to store intermediate window probing results. + * @brief Helper struct to store intermediate bucket probing results. */ -struct window_probing_results { +struct bucket_probing_results { detail::equal_result state_; ///< Equal result - int32_t intra_window_index_; ///< Intra-window index + int32_t intra_bucket_index_; ///< Intra-bucket index /** - * @brief Constructs window_probing_results. + * @brief Constructs bucket_probing_results. * * @param state The three way equality result - * @param index Intra-window index + * @param index Intra-bucket index */ - __device__ explicit constexpr window_probing_results(detail::equal_result state, + __device__ explicit constexpr bucket_probing_results(detail::equal_result state, int32_t index) noexcept - : state_{state}, intra_window_index_{index} + : state_{state}, intra_bucket_index_{index} { } }; @@ -104,15 +104,15 @@ class open_addressing_ref_impl { static constexpr auto allows_duplicates = AllowsDuplicates; // TODO: how to re-enable this check? - // static_assert(is_window_extent_v, - // "Extent is not a valid cuco::window_extent"); + // static_assert(is_bucket_extent_v, + // "Extent is not a valid cuco::bucket_extent"); public: using key_type = Key; ///< Key type using probing_scheme_type = ProbingScheme; ///< Type of probing scheme using hasher = typename probing_scheme_type::hasher; ///< Hash function type using storage_ref_type = StorageRef; ///< Type of storage ref - using window_type = typename storage_ref_type::window_type; ///< Window type + using bucket_type = typename storage_ref_type::bucket_type; ///< Bucket type using value_type = typename storage_ref_type::value_type; ///< Storage element type using extent_type = typename storage_ref_type::extent_type; ///< Extent type using size_type = typename storage_ref_type::size_type; ///< Probing scheme size type @@ -121,8 +121,8 @@ class open_addressing_ref_impl { using const_iterator = typename storage_ref_type::const_iterator; ///< Const slot iterator type static constexpr auto cg_size = probing_scheme_type::cg_size; ///< Cooperative group size - static constexpr auto window_size = - storage_ref_type::window_size; ///< Number of elements handled per window + static constexpr auto bucket_size = + storage_ref_type::bucket_size; ///< Number of elements handled per bucket static constexpr auto thread_scope = Scope; ///< CUDA thread scope /** @@ -272,13 +272,13 @@ class open_addressing_ref_impl { } /** - * @brief Gets the window extent of the current storage. + * @brief Gets the bucket extent of the current storage. * - * @return The window extent. + * @return The bucket extent. */ - [[nodiscard]] __host__ __device__ constexpr extent_type window_extent() const noexcept + [[nodiscard]] __host__ __device__ constexpr extent_type bucket_extent() const noexcept { - return storage_ref_.window_extent(); + return storage_ref_.bucket_extent(); } /** @@ -311,9 +311,9 @@ class open_addressing_ref_impl { * the ownership of the memory */ template - __device__ void make_copy(CG const& g, window_type* const memory_to_use) const noexcept + __device__ void make_copy(CG const& g, bucket_type* const memory_to_use) const noexcept { - auto const num_windows = static_cast(this->window_extent()); + auto const num_buckets = static_cast(this->bucket_extent()); #if defined(CUCO_HAS_CUDA_BARRIER) #pragma nv_diagnostic push // Disables `barrier` initialization warning. @@ -324,13 +324,13 @@ class open_addressing_ref_impl { g.sync(); cuda::memcpy_async( - g, memory_to_use, this->storage_ref().data(), sizeof(window_type) * num_windows, barrier); + g, memory_to_use, this->storage_ref().data(), sizeof(bucket_type) * num_buckets, barrier); barrier.arrive_and_wait(); #else - window_type const* const windows_ptr = this->storage_ref().data(); - for (size_type i = g.thread_rank(); i < num_windows; i += g.size()) { - memory_to_use[i] = windows_ptr[i]; + bucket_type const* const buckets_ptr = this->storage_ref().data(); + for (size_type i = g.thread_rank(); i < num_buckets; i += g.size()) { + memory_to_use[i] = buckets_ptr[i]; } g.sync(); #endif @@ -349,11 +349,11 @@ class open_addressing_ref_impl { __device__ constexpr void initialize(CG const& tile) noexcept { auto tid = tile.thread_rank(); - auto* const windows_ptr = this->storage_ref().data(); - while (tid < static_cast(this->window_extent())) { - auto& window = *(windows_ptr + tid); + auto* const buckets_ptr = this->storage_ref().data(); + while (tid < static_cast(this->bucket_extent())) { + auto& bucket = *(buckets_ptr + tid); #pragma unroll - for (auto& slot : window) { + for (auto& slot : bucket) { slot = this->empty_slot_sentinel(); } tid += tile.size(); @@ -377,12 +377,12 @@ class open_addressing_ref_impl { auto const val = this->heterogeneous_value(value); auto const key = this->extract_key(val); - auto probing_iter = probing_scheme_(key, storage_ref_.window_extent()); + auto probing_iter = probing_scheme_(key, storage_ref_.bucket_extent()); while (true) { - auto const window_slots = storage_ref_[*probing_iter]; + auto const bucket_slots = storage_ref_[*probing_iter]; - for (auto& slot_content : window_slots) { + for (auto& slot_content : bucket_slots) { auto const eq_res = this->predicate_.operator()(key, this->extract_key(slot_content)); @@ -391,8 +391,8 @@ class open_addressing_ref_impl { if (eq_res == detail::equal_result::EQUAL) { return false; } } if (eq_res == detail::equal_result::AVAILABLE) { - auto const intra_window_index = thrust::distance(window_slots.begin(), &slot_content); - switch (attempt_insert((storage_ref_.data() + *probing_iter)->data() + intra_window_index, + auto const intra_bucket_index = thrust::distance(bucket_slots.begin(), &slot_content); + switch (attempt_insert((storage_ref_.data() + *probing_iter)->data() + intra_bucket_index, slot_content, val)) { case insert_result::DUPLICATE: { @@ -427,29 +427,29 @@ class open_addressing_ref_impl { { auto const val = this->heterogeneous_value(value); auto const key = this->extract_key(val); - auto probing_iter = probing_scheme_(group, key, storage_ref_.window_extent()); + auto probing_iter = probing_scheme_(group, key, storage_ref_.bucket_extent()); while (true) { - auto const window_slots = storage_ref_[*probing_iter]; + auto const bucket_slots = storage_ref_[*probing_iter]; - auto const [state, intra_window_index] = [&]() { - for (auto i = 0; i < window_size; ++i) { + auto const [state, intra_bucket_index] = [&]() { + for (auto i = 0; i < bucket_size; ++i) { switch ( - this->predicate_.operator()(key, this->extract_key(window_slots[i]))) { + this->predicate_.operator()(key, this->extract_key(bucket_slots[i]))) { case detail::equal_result::AVAILABLE: - return window_probing_results{detail::equal_result::AVAILABLE, i}; + return bucket_probing_results{detail::equal_result::AVAILABLE, i}; case detail::equal_result::EQUAL: { if constexpr (allows_duplicates) { continue; } else { - return window_probing_results{detail::equal_result::EQUAL, i}; + return bucket_probing_results{detail::equal_result::EQUAL, i}; } } default: continue; } } // returns dummy index `-1` for UNEQUAL - return window_probing_results{detail::equal_result::UNEQUAL, -1}; + return bucket_probing_results{detail::equal_result::UNEQUAL, -1}; }(); if constexpr (not allows_duplicates) { @@ -462,8 +462,8 @@ class open_addressing_ref_impl { auto const src_lane = __ffs(group_contains_available) - 1; auto const status = (group.thread_rank() == src_lane) - ? attempt_insert((storage_ref_.data() + *probing_iter)->data() + intra_window_index, - window_slots[intra_window_index], + ? attempt_insert((storage_ref_.data() + *probing_iter)->data() + intra_bucket_index, + bucket_slots[intra_bucket_index], val) : insert_result::CONTINUE; @@ -512,39 +512,39 @@ class open_addressing_ref_impl { auto const val = this->heterogeneous_value(value); auto const key = this->extract_key(val); - auto probing_iter = probing_scheme_(key, storage_ref_.window_extent()); + auto probing_iter = probing_scheme_(key, storage_ref_.bucket_extent()); while (true) { - auto const window_slots = storage_ref_[*probing_iter]; + auto const bucket_slots = storage_ref_[*probing_iter]; - for (auto i = 0; i < window_size; ++i) { + for (auto i = 0; i < bucket_size; ++i) { auto const eq_res = - this->predicate_.operator()(key, this->extract_key(window_slots[i])); - auto* window_ptr = (storage_ref_.data() + *probing_iter)->data(); + this->predicate_.operator()(key, this->extract_key(bucket_slots[i])); + auto* bucket_ptr = (storage_ref_.data() + *probing_iter)->data(); // If the key is already in the container, return false if (eq_res == detail::equal_result::EQUAL) { if constexpr (has_payload) { // wait to ensure that the write to the value part also took place - this->wait_for_payload((window_ptr + i)->second, this->empty_value_sentinel()); + this->wait_for_payload((bucket_ptr + i)->second, this->empty_value_sentinel()); } - return {iterator{&window_ptr[i]}, false}; + return {iterator{&bucket_ptr[i]}, false}; } if (eq_res == detail::equal_result::AVAILABLE) { - switch (this->attempt_insert_stable(window_ptr + i, window_slots[i], val)) { + switch (this->attempt_insert_stable(bucket_ptr + i, bucket_slots[i], val)) { case insert_result::SUCCESS: { if constexpr (has_payload) { // wait to ensure that the write to the value part also took place - this->wait_for_payload((window_ptr + i)->second, this->empty_value_sentinel()); + this->wait_for_payload((bucket_ptr + i)->second, this->empty_value_sentinel()); } - return {iterator{&window_ptr[i]}, true}; + return {iterator{&bucket_ptr[i]}, true}; } case insert_result::DUPLICATE: { if constexpr (has_payload) { // wait to ensure that the write to the value part also took place - this->wait_for_payload((window_ptr + i)->second, this->empty_value_sentinel()); + this->wait_for_payload((bucket_ptr + i)->second, this->empty_value_sentinel()); } - return {iterator{&window_ptr[i]}, false}; + return {iterator{&bucket_ptr[i]}, false}; } default: continue; } @@ -583,23 +583,23 @@ class open_addressing_ref_impl { auto const val = this->heterogeneous_value(value); auto const key = this->extract_key(val); - auto probing_iter = probing_scheme_(group, key, storage_ref_.window_extent()); + auto probing_iter = probing_scheme_(group, key, storage_ref_.bucket_extent()); while (true) { - auto const window_slots = storage_ref_[*probing_iter]; + auto const bucket_slots = storage_ref_[*probing_iter]; - auto const [state, intra_window_index] = [&]() { + auto const [state, intra_bucket_index] = [&]() { auto res = detail::equal_result::UNEQUAL; - for (auto i = 0; i < window_size; ++i) { + for (auto i = 0; i < bucket_size; ++i) { res = - this->predicate_.operator()(key, this->extract_key(window_slots[i])); - if (res != detail::equal_result::UNEQUAL) { return window_probing_results{res, i}; } + this->predicate_.operator()(key, this->extract_key(bucket_slots[i])); + if (res != detail::equal_result::UNEQUAL) { return bucket_probing_results{res, i}; } } // returns dummy index `-1` for UNEQUAL - return window_probing_results{res, -1}; + return bucket_probing_results{res, -1}; }(); - auto* slot_ptr = (storage_ref_.data() + *probing_iter)->data() + intra_window_index; + auto* slot_ptr = (storage_ref_.data() + *probing_iter)->data() + intra_bucket_index; // If the key is already in the container, return false auto const group_finds_equal = group.ballot(state == detail::equal_result::EQUAL); @@ -620,9 +620,9 @@ class open_addressing_ref_impl { if (group_contains_available) { auto const src_lane = __ffs(group_contains_available) - 1; auto const res = group.shfl(reinterpret_cast(slot_ptr), src_lane); - auto const status = [&, target_idx = intra_window_index]() { + auto const status = [&, target_idx = intra_bucket_index]() { if (group.thread_rank() != src_lane) { return insert_result::CONTINUE; } - return this->attempt_insert_stable(slot_ptr, window_slots[target_idx], val); + return this->attempt_insert_stable(slot_ptr, bucket_slots[target_idx], val); }(); switch (group.shfl(status, src_lane)) { @@ -668,12 +668,12 @@ class open_addressing_ref_impl { { static_assert(cg_size == 1, "Non-CG operation is incompatible with the current probing scheme"); - auto probing_iter = probing_scheme_(key, storage_ref_.window_extent()); + auto probing_iter = probing_scheme_(key, storage_ref_.bucket_extent()); while (true) { - auto const window_slots = storage_ref_[*probing_iter]; + auto const bucket_slots = storage_ref_[*probing_iter]; - for (auto& slot_content : window_slots) { + for (auto& slot_content : bucket_slots) { auto const eq_res = this->predicate_.operator()(key, this->extract_key(slot_content)); @@ -681,9 +681,9 @@ class open_addressing_ref_impl { if (eq_res == detail::equal_result::EMPTY) { return false; } // Key exists, return true if successfully deleted if (eq_res == detail::equal_result::EQUAL) { - auto const intra_window_index = thrust::distance(window_slots.begin(), &slot_content); + auto const intra_bucket_index = thrust::distance(bucket_slots.begin(), &slot_content); switch (attempt_insert_stable( - (storage_ref_.data() + *probing_iter)->data() + intra_window_index, + (storage_ref_.data() + *probing_iter)->data() + intra_bucket_index, slot_content, this->erased_slot_sentinel())) { case insert_result::SUCCESS: return true; @@ -710,19 +710,19 @@ class open_addressing_ref_impl { __device__ bool erase(cooperative_groups::thread_block_tile const& group, ProbeKey const& key) noexcept { - auto probing_iter = probing_scheme_(group, key, storage_ref_.window_extent()); + auto probing_iter = probing_scheme_(group, key, storage_ref_.bucket_extent()); while (true) { - auto const window_slots = storage_ref_[*probing_iter]; + auto const bucket_slots = storage_ref_[*probing_iter]; - auto const [state, intra_window_index] = [&]() { + auto const [state, intra_bucket_index] = [&]() { auto res = detail::equal_result::UNEQUAL; - for (auto i = 0; i < window_size; ++i) { - res = this->predicate_.operator()(key, this->extract_key(window_slots[i])); - if (res != detail::equal_result::UNEQUAL) { return window_probing_results{res, i}; } + for (auto i = 0; i < bucket_size; ++i) { + res = this->predicate_.operator()(key, this->extract_key(bucket_slots[i])); + if (res != detail::equal_result::UNEQUAL) { return bucket_probing_results{res, i}; } } // returns dummy index `-1` for UNEQUAL - return window_probing_results{res, -1}; + return bucket_probing_results{res, -1}; }(); auto const group_contains_equal = group.ballot(state == detail::equal_result::EQUAL); @@ -731,8 +731,8 @@ class open_addressing_ref_impl { auto const status = (group.thread_rank() == src_lane) ? attempt_insert_stable( - (storage_ref_.data() + *probing_iter)->data() + intra_window_index, - window_slots[intra_window_index], + (storage_ref_.data() + *probing_iter)->data() + intra_bucket_index, + bucket_slots[intra_bucket_index], this->erased_slot_sentinel()) : insert_result::CONTINUE; @@ -766,13 +766,13 @@ class open_addressing_ref_impl { [[nodiscard]] __device__ bool contains(ProbeKey const& key) const noexcept { static_assert(cg_size == 1, "Non-CG operation is incompatible with the current probing scheme"); - auto probing_iter = probing_scheme_(key, storage_ref_.window_extent()); + auto probing_iter = probing_scheme_(key, storage_ref_.bucket_extent()); while (true) { // TODO atomic_ref::load if insert operator is present - auto const window_slots = storage_ref_[*probing_iter]; + auto const bucket_slots = storage_ref_[*probing_iter]; - for (auto& slot_content : window_slots) { + for (auto& slot_content : bucket_slots) { switch (this->predicate_.operator()(key, this->extract_key(slot_content))) { case detail::equal_result::UNEQUAL: continue; case detail::equal_result::EMPTY: return false; @@ -800,14 +800,14 @@ class open_addressing_ref_impl { [[nodiscard]] __device__ bool contains( cooperative_groups::thread_block_tile const& group, ProbeKey const& key) const noexcept { - auto probing_iter = probing_scheme_(group, key, storage_ref_.window_extent()); + auto probing_iter = probing_scheme_(group, key, storage_ref_.bucket_extent()); while (true) { - auto const window_slots = storage_ref_[*probing_iter]; + auto const bucket_slots = storage_ref_[*probing_iter]; auto const state = [&]() { auto res = detail::equal_result::UNEQUAL; - for (auto& slot : window_slots) { + for (auto& slot : bucket_slots) { res = this->predicate_.operator()(key, this->extract_key(slot)); if (res != detail::equal_result::UNEQUAL) { return res; } } @@ -836,14 +836,14 @@ class open_addressing_ref_impl { if constexpr (not allows_duplicates) { return static_cast(this->contains(key)); } else { - auto probing_iter = probing_scheme_(key, storage_ref_.window_extent()); + auto probing_iter = probing_scheme_(key, storage_ref_.bucket_extent()); size_type count = 0; while (true) { // TODO atomic_ref::load if insert operator is present - auto const window_slots = storage_ref_[*probing_iter]; + auto const bucket_slots = storage_ref_[*probing_iter]; - for (auto& slot_content : window_slots) { + for (auto& slot_content : bucket_slots) { switch ( this->predicate_.operator()(key, this->extract_key(slot_content))) { case detail::equal_result::EMPTY: return count; @@ -870,15 +870,15 @@ class open_addressing_ref_impl { [[nodiscard]] __device__ size_type count( cooperative_groups::thread_block_tile const& group, ProbeKey const& key) const noexcept { - auto probing_iter = probing_scheme_(group, key, storage_ref_.window_extent()); + auto probing_iter = probing_scheme_(group, key, storage_ref_.bucket_extent()); size_type count = 0; while (true) { - auto const window_slots = storage_ref_[*probing_iter]; + auto const bucket_slots = storage_ref_[*probing_iter]; auto const state = [&]() { auto res = detail::equal_result::UNEQUAL; - for (auto& slot : window_slots) { + for (auto& slot : bucket_slots) { res = this->predicate_.operator()(key, this->extract_key(slot)); if (res == detail::equal_result::EMPTY) { return res; } count += static_cast(res); @@ -907,15 +907,15 @@ class open_addressing_ref_impl { [[nodiscard]] __device__ const_iterator find(ProbeKey const& key) const noexcept { static_assert(cg_size == 1, "Non-CG operation is incompatible with the current probing scheme"); - auto probing_iter = probing_scheme_(key, storage_ref_.window_extent()); + auto probing_iter = probing_scheme_(key, storage_ref_.bucket_extent()); while (true) { // TODO atomic_ref::load if insert operator is present - auto const window_slots = storage_ref_[*probing_iter]; + auto const bucket_slots = storage_ref_[*probing_iter]; - for (auto i = 0; i < window_size; ++i) { + for (auto i = 0; i < bucket_size; ++i) { switch ( - this->predicate_.operator()(key, this->extract_key(window_slots[i]))) { + this->predicate_.operator()(key, this->extract_key(bucket_slots[i]))) { case detail::equal_result::EMPTY: { return this->end(); } @@ -946,19 +946,19 @@ class open_addressing_ref_impl { [[nodiscard]] __device__ const_iterator find( cooperative_groups::thread_block_tile const& group, ProbeKey const& key) const noexcept { - auto probing_iter = probing_scheme_(group, key, storage_ref_.window_extent()); + auto probing_iter = probing_scheme_(group, key, storage_ref_.bucket_extent()); while (true) { - auto const window_slots = storage_ref_[*probing_iter]; + auto const bucket_slots = storage_ref_[*probing_iter]; - auto const [state, intra_window_index] = [&]() { + auto const [state, intra_bucket_index] = [&]() { auto res = detail::equal_result::UNEQUAL; - for (auto i = 0; i < window_size; ++i) { - res = this->predicate_.operator()(key, this->extract_key(window_slots[i])); - if (res != detail::equal_result::UNEQUAL) { return window_probing_results{res, i}; } + for (auto i = 0; i < bucket_size; ++i) { + res = this->predicate_.operator()(key, this->extract_key(bucket_slots[i])); + if (res != detail::equal_result::UNEQUAL) { return bucket_probing_results{res, i}; } } // returns dummy index `-1` for UNEQUAL - return window_probing_results{res, -1}; + return bucket_probing_results{res, -1}; }(); // Find a match for the probe key, thus return an iterator to the entry @@ -966,7 +966,7 @@ class open_addressing_ref_impl { if (group_finds_match) { auto const src_lane = __ffs(group_finds_match) - 1; auto const res = group.shfl( - reinterpret_cast(&(*(storage_ref_.data() + *probing_iter))[intra_window_index]), + reinterpret_cast(&(*(storage_ref_.data() + *probing_iter))[intra_bucket_index]), src_lane); return const_iterator{reinterpret_cast(res)}; } @@ -1133,7 +1133,7 @@ class open_addressing_ref_impl { static_assert(flushing_tile_size >= probing_tile_size); auto constexpr num_flushing_tiles = BlockSize / flushing_tile_size; - auto constexpr max_matches_per_step = flushing_tile_size * window_size; + auto constexpr max_matches_per_step = flushing_tile_size * bucket_size; auto constexpr buffer_size = buffer_multiplier * max_matches_per_step; auto const flushing_tile = cg::tiled_partition(block); @@ -1180,20 +1180,20 @@ class open_addressing_ref_impl { // make sure the flushing_tile is converged at this point to get a coalesced load auto const& probe = *(input_probe + idx); auto probing_iter = - this->probing_scheme_(probing_tile, probe, this->storage_ref_.window_extent()); + this->probing_scheme_(probing_tile, probe, this->storage_ref_.bucket_extent()); bool empty_found = false; bool match_found = false; [[maybe_unused]] bool found_any_match = false; // only needed if `IsOuter == true` while (true) { // TODO atomic_ref::load if insert operator is present - auto const window_slots = this->storage_ref_[*probing_iter]; + auto const bucket_slots = this->storage_ref_[*probing_iter]; - for (int32_t i = 0; i < window_size; ++i) { + for (int32_t i = 0; i < bucket_size; ++i) { if (not empty_found) { // inspect slot content switch (this->predicate_.operator()( - probe, this->extract_key(window_slots[i]))) { + probe, this->extract_key(bucket_slots[i]))) { case detail::equal_result::EMPTY: { empty_found = true; break; @@ -1214,7 +1214,7 @@ class open_addressing_ref_impl { if (match_found) { probe_buffers[flushing_tile_id][num_matches + matching_tile.thread_rank()] = probe; match_buffers[flushing_tile_id][num_matches + matching_tile.thread_rank()] = - window_slots[i]; + bucket_slots[i]; } // add number of new matches to the buffer counter @@ -1266,7 +1266,7 @@ class open_addressing_ref_impl { // the entire flushing tile has finished its work if (finished) { break; } - // onto the next probing window + // onto the next probing bucket ++probing_iter; } @@ -1297,20 +1297,20 @@ class open_addressing_ref_impl { __device__ void for_each(ProbeKey const& key, CallbackOp&& callback_op) const noexcept { static_assert(cg_size == 1, "Non-CG operation is incompatible with the current probing scheme"); - auto probing_iter = this->probing_scheme_(key, this->storage_ref_.window_extent()); + auto probing_iter = this->probing_scheme_(key, this->storage_ref_.bucket_extent()); while (true) { // TODO atomic_ref::load if insert operator is present - auto const window_slots = this->storage_ref_[*probing_iter]; + auto const bucket_slots = this->storage_ref_[*probing_iter]; - for (int32_t i = 0; i < window_size; ++i) { + for (int32_t i = 0; i < bucket_size; ++i) { switch ( - this->predicate_.operator()(key, this->extract_key(window_slots[i]))) { + this->predicate_.operator()(key, this->extract_key(bucket_slots[i]))) { case detail::equal_result::EMPTY: { return; } case detail::equal_result::EQUAL: { - callback_op(window_slots[i]); + callback_op(bucket_slots[i]); continue; } default: continue; @@ -1344,22 +1344,22 @@ class open_addressing_ref_impl { ProbeKey const& key, CallbackOp&& callback_op) const noexcept { - auto probing_iter = this->probing_scheme_(group, key, this->storage_ref_.window_extent()); + auto probing_iter = this->probing_scheme_(group, key, this->storage_ref_.bucket_extent()); bool empty = false; while (true) { // TODO atomic_ref::load if insert operator is present - auto const window_slots = this->storage_ref_[*probing_iter]; + auto const bucket_slots = this->storage_ref_[*probing_iter]; - for (int32_t i = 0; i < window_size and !empty; ++i) { + for (int32_t i = 0; i < bucket_size and !empty; ++i) { switch ( - this->predicate_.operator()(key, this->extract_key(window_slots[i]))) { + this->predicate_.operator()(key, this->extract_key(bucket_slots[i]))) { case detail::equal_result::EMPTY: { empty = true; continue; } case detail::equal_result::EQUAL: { - callback_op(window_slots[i]); + callback_op(bucket_slots[i]); continue; } default: { @@ -1388,8 +1388,8 @@ class open_addressing_ref_impl { * * @note The `sync_op` function can be used to perform work that requires synchronizing threads in * `group` inbetween probing steps, where the number of probing steps performed between - * synchronization points is capped by `window_size * cg_size`. The functor will be called right - * after the current probing window has been traversed. + * synchronization points is capped by `bucket_size * cg_size`. The functor will be called right + * after the current probing bucket has been traversed. * * @tparam ProbeKey Probe key type * @tparam CallbackOp Type of unary callback function object @@ -1398,7 +1398,7 @@ class open_addressing_ref_impl { * @param group The Cooperative Group used to perform this operation * @param key The key to search for * @param callback_op Function to apply to every matched slot - * @param sync_op Function that is allowed to synchronize `group` inbetween probing windows + * @param sync_op Function that is allowed to synchronize `group` inbetween probing buckets */ template __device__ void for_each(cooperative_groups::thread_block_tile const& group, @@ -1406,22 +1406,22 @@ class open_addressing_ref_impl { CallbackOp&& callback_op, SyncOp&& sync_op) const noexcept { - auto probing_iter = this->probing_scheme_(group, key, this->storage_ref_.window_extent()); + auto probing_iter = this->probing_scheme_(group, key, this->storage_ref_.bucket_extent()); bool empty = false; while (true) { // TODO atomic_ref::load if insert operator is present - auto const window_slots = this->storage_ref_[*probing_iter]; + auto const bucket_slots = this->storage_ref_[*probing_iter]; - for (int32_t i = 0; i < window_size and !empty; ++i) { + for (int32_t i = 0; i < bucket_size and !empty; ++i) { switch ( - this->predicate_.operator()(key, this->extract_key(window_slots[i]))) { + this->predicate_.operator()(key, this->extract_key(bucket_slots[i]))) { case detail::equal_result::EMPTY: { empty = true; continue; } case detail::equal_result::EQUAL: { - callback_op(window_slots[i]); + callback_op(bucket_slots[i]); continue; } default: { diff --git a/include/cuco/detail/probe_sequence_impl.cuh b/include/cuco/detail/probe_sequence_impl.cuh index 51b1bfd68..a732363da 100644 --- a/include/cuco/detail/probe_sequence_impl.cuh +++ b/include/cuco/detail/probe_sequence_impl.cuh @@ -242,7 +242,7 @@ class linear_probing_impl if constexpr (not uses_vector_load()) { return g.thread_rank(); } }(); - // Each CG accesses to a window of (`cg_size` * `vector_width`) + // Each CG accesses to a bucket of (`cg_size` * `vector_width`) // slots if vector-load is used or `cg_size` slots otherwise return &slots_[(hash_value + offset) % capacity_]; } diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index 611f595c1..47b083372 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -539,14 +539,14 @@ __device__ bool static_map::device_mutable_view::i return false; } - auto const window_contains_available = g.ballot(slot_is_available); + auto const bucket_contains_available = g.ballot(slot_is_available); // we found an empty slot, but not the key we are inserting, so this must // be an empty slot into which we can insert the key - if (window_contains_available) { + if (bucket_contains_available) { // the first lane in the group with an empty slot will attempt the insert insert_result status{insert_result::CONTINUE}; - uint32_t src_lane = __ffs(window_contains_available) - 1; + uint32_t src_lane = __ffs(bucket_contains_available) - 1; if (g.thread_rank() == src_lane) { // One single CAS operation if `value_type` is packable @@ -572,10 +572,10 @@ __device__ bool static_map::device_mutable_view::i if (status == insert_result::DUPLICATE) { return false; } // if we've gotten this far, a different key took our spot // before we could insert. We need to retry the insert on the - // same window + // same bucket } - // if there are no empty slots in the current window, - // we move onto the next window + // if there are no empty slots in the current bucket, + // we move onto the next bucket else { current_slot = next_slot(g, current_slot); } @@ -766,8 +766,8 @@ static_map::device_view::find(CG g, // we found an empty slot, meaning that the key we're searching for isn't present if (g.ballot(slot_is_empty)) { return this->end(); } - // otherwise, all slots in the current window are full with other keys, so we move onto the - // next window + // otherwise, all slots in the current bucket are full with other keys, so we move onto the + // next bucket current_slot = next_slot(g, current_slot); } } @@ -805,8 +805,8 @@ static_map::device_view::find(CG g, // for isn't in this submap, so we should move onto the next one if (g.ballot(slot_is_empty)) { return this->end(); } - // otherwise, all slots in the current window are full with other keys, - // so we move onto the next window in the current submap + // otherwise, all slots in the current bucket are full with other keys, + // so we move onto the next bucket in the current submap current_slot = next_slot(g, current_slot); } @@ -855,8 +855,8 @@ static_map::device_view::contains(CG const& g, // we found an empty slot, meaning that the key we're searching for isn't present if (g.ballot(slot_is_empty)) { return false; } - // otherwise, all slots in the current window are full with other keys, so we move onto the - // next window + // otherwise, all slots in the current bucket are full with other keys, so we move onto the + // next bucket current_slot = next_slot(g, current_slot); } } diff --git a/include/cuco/detail/static_map/helpers.cuh b/include/cuco/detail/static_map/helpers.cuh index 04d19f842..9627f4c9c 100644 --- a/include/cuco/detail/static_map/helpers.cuh +++ b/include/cuco/detail/static_map/helpers.cuh @@ -76,7 +76,7 @@ void dispatch_insert_or_apply( cuco::storage<1>>; using shared_map_ref_type = typename shared_map_type::ref_type<>; - auto constexpr window_extent = cuco::make_window_extent(extent_type{}); + auto constexpr bucket_extent = cuco::make_bucket_extent(extent_type{}); auto insert_or_apply_shmem_fn_ptr = insert_or_apply_shmem 2) { insert_or_apply_shmem <<>>( - first, num, init, op, ref, window_extent); + first, num, init, op, ref, bucket_extent); } else { insert_or_apply <<>>( diff --git a/include/cuco/detail/static_map/kernels.cuh b/include/cuco/detail/static_map/kernels.cuh index 4e4f396db..5c468ba37 100644 --- a/include/cuco/detail/static_map/kernels.cuh +++ b/include/cuco/detail/static_map/kernels.cuh @@ -158,7 +158,7 @@ __global__ void insert_or_apply( * @param init The init value of the op * @param op Callable object to perform apply operation. * @param ref Non-owning container device ref used to access the slot storage - * @param window_extent Window Extent used for shared memory map slot storage + * @param bucket_extent Bucket Extent used for shared memory map slot storage */ template ; __shared__ atomic_type block_cardinality; @@ -234,9 +234,9 @@ CUCO_KERNEL __launch_bounds__(BlockSize) void insert_or_apply_shmem( } // insert-or-apply from shared map to global map - auto window_idx = thread_idx; - while (window_idx < num_windows) { - auto const slot = storage[window_idx][0]; + auto bucket_idx = thread_idx; + while (bucket_idx < num_buckets) { + auto const slot = storage[bucket_idx][0]; if (not cuco::detail::bitwise_compare(slot.first, ref.empty_key_sentinel())) { if constexpr (HasInit) { ref.insert_or_apply(slot, init, op); @@ -244,7 +244,7 @@ CUCO_KERNEL __launch_bounds__(BlockSize) void insert_or_apply_shmem( ref.insert_or_apply(slot, op); } } - window_idx += BlockSize; + bucket_idx += BlockSize; } // insert-or-apply into global map for the remaining elements whose block_cardinality diff --git a/include/cuco/detail/static_map/static_map.inl b/include/cuco/detail/static_map/static_map.inl index 16da68629..8b49c35d4 100644 --- a/include/cuco/detail/static_map/static_map.inl +++ b/include/cuco/detail/static_map/static_map.inl @@ -616,7 +616,7 @@ template ::rehash( size_type capacity, cuda::stream_ref stream) { - auto const extent = make_window_extent(capacity); + auto const extent = make_bucket_extent(capacity); this->impl_->rehash(extent, *this, stream); } @@ -645,7 +645,7 @@ template ::rehash_async( size_type capacity, cuda::stream_ref stream) { - auto const extent = make_window_extent(capacity); + auto const extent = make_bucket_extent(capacity); this->impl_->rehash_async(extent, *this, stream); } diff --git a/include/cuco/detail/static_map/static_map_ref.inl b/include/cuco/detail/static_map/static_map_ref.inl index 662667b3e..f06f03fdb 100644 --- a/include/cuco/detail/static_map/static_map_ref.inl +++ b/include/cuco/detail/static_map/static_map_ref.inl @@ -239,10 +239,10 @@ __host__ __device__ constexpr static_map_ref::extent_type -static_map_ref::window_extent() +static_map_ref::bucket_extent() const noexcept { - return impl_.window_extent(); + return impl_.bucket_extent(); } template __device__ constexpr auto static_map_ref::make_copy( CG const& tile, - window_type* const memory_to_use, + bucket_type* const memory_to_use, cuda_thread_scope scope) const noexcept { this->impl_.make_copy(tile, memory_to_use); @@ -378,7 +378,7 @@ static_map_ref this->key_eq(), this->probing_scheme(), scope, - storage_ref_type{this->window_extent(), memory_to_use}}; + storage_ref_type{this->bucket_extent(), memory_to_use}}; } template (key, slot_content.first); - auto const intra_window_index = thrust::distance(window_slots.begin(), &slot_content); - auto slot_ptr = (storage_ref.data() + *probing_iter)->data() + intra_window_index; + auto const intra_bucket_index = thrust::distance(bucket_slots.begin(), &slot_content); + auto slot_ptr = (storage_ref.data() + *probing_iter)->data() + intra_bucket_index; // If the key is already in the container, update the payload and return if (eq_res == detail::equal_result::EQUAL) { @@ -538,24 +538,24 @@ class operator_impl< auto const key = ref_.impl_.extract_key(val); auto& probing_scheme = ref_.impl_.probing_scheme(); auto storage_ref = ref_.impl_.storage_ref(); - auto probing_iter = probing_scheme(group, key, storage_ref.window_extent()); + auto probing_iter = probing_scheme(group, key, storage_ref.bucket_extent()); while (true) { - auto const window_slots = storage_ref[*probing_iter]; + auto const bucket_slots = storage_ref[*probing_iter]; - auto const [state, intra_window_index] = [&]() { + auto const [state, intra_bucket_index] = [&]() { auto res = detail::equal_result::UNEQUAL; - for (auto i = 0; i < window_size; ++i) { - res = ref_.impl_.predicate_.operator()(key, window_slots[i].first); + for (auto i = 0; i < bucket_size; ++i) { + res = ref_.impl_.predicate_.operator()(key, bucket_slots[i].first); if (res != detail::equal_result::UNEQUAL) { - return detail::window_probing_results{res, i}; + return detail::bucket_probing_results{res, i}; } } // returns dummy index `-1` for UNEQUAL - return detail::window_probing_results{res, -1}; + return detail::bucket_probing_results{res, -1}; }(); - auto slot_ptr = (storage_ref.data() + *probing_iter)->data() + intra_window_index; + auto slot_ptr = (storage_ref.data() + *probing_iter)->data() + intra_bucket_index; auto const group_contains_equal = group.ballot(state == detail::equal_result::EQUAL); if (group_contains_equal) { @@ -636,7 +636,7 @@ class operator_impl< using value_type = typename base_type::value_type; static constexpr auto cg_size = base_type::cg_size; - static constexpr auto window_size = base_type::window_size; + static constexpr auto bucket_size = base_type::bucket_size; public: /** @@ -854,20 +854,20 @@ class operator_impl< auto const key = ref_.impl_.extract_key(val); auto& probing_scheme = ref_.impl_.probing_scheme(); auto storage_ref = ref_.impl_.storage_ref(); - auto probing_iter = probing_scheme(key, storage_ref.window_extent()); + auto probing_iter = probing_scheme(key, storage_ref.bucket_extent()); auto const empty_value = ref_.empty_value_sentinel(); // wait for payload only when init != sentinel and insert strategy is not `packed_cas` auto constexpr wait_for_payload = (not UseDirectApply) and (sizeof(value_type) > 8); while (true) { - auto const window_slots = storage_ref[*probing_iter]; + auto const bucket_slots = storage_ref[*probing_iter]; - for (auto& slot_content : window_slots) { + for (auto& slot_content : bucket_slots) { auto const eq_res = ref_.impl_.predicate_.operator()(key, slot_content.first); - auto const intra_window_index = thrust::distance(window_slots.begin(), &slot_content); - auto slot_ptr = (storage_ref.data() + *probing_iter)->data() + intra_window_index; + auto const intra_bucket_index = thrust::distance(bucket_slots.begin(), &slot_content); + auto slot_ptr = (storage_ref.data() + *probing_iter)->data() + intra_bucket_index; // If the key is already in the container, update the payload and return if (eq_res == detail::equal_result::EQUAL) { @@ -928,28 +928,28 @@ class operator_impl< auto const key = ref_.impl_.extract_key(val); auto& probing_scheme = ref_.impl_.probing_scheme(); auto storage_ref = ref_.impl_.storage_ref(); - auto probing_iter = probing_scheme(group, key, storage_ref.window_extent()); + auto probing_iter = probing_scheme(group, key, storage_ref.bucket_extent()); auto const empty_value = ref_.empty_value_sentinel(); // wait for payload only when init != sentinel and insert strategy is not `packed_cas` auto constexpr wait_for_payload = (not UseDirectApply) and (sizeof(value_type) > 8); while (true) { - auto const window_slots = storage_ref[*probing_iter]; + auto const bucket_slots = storage_ref[*probing_iter]; - auto const [state, intra_window_index] = [&]() { + auto const [state, intra_bucket_index] = [&]() { auto res = detail::equal_result::UNEQUAL; - for (auto i = 0; i < window_size; ++i) { - res = ref_.impl_.predicate_.operator()(key, window_slots[i].first); + for (auto i = 0; i < bucket_size; ++i) { + res = ref_.impl_.predicate_.operator()(key, bucket_slots[i].first); if (res != detail::equal_result::UNEQUAL) { - return detail::window_probing_results{res, i}; + return detail::bucket_probing_results{res, i}; } } // returns dummy index `-1` for UNEQUAL - return detail::window_probing_results{res, -1}; + return detail::bucket_probing_results{res, -1}; }(); - auto* slot_ptr = (storage_ref.data() + *probing_iter)->data() + intra_window_index; + auto* slot_ptr = (storage_ref.data() + *probing_iter)->data() + intra_bucket_index; auto const group_contains_equal = group.ballot(state == detail::equal_result::EQUAL); if (group_contains_equal) { @@ -966,10 +966,10 @@ class operator_impl< auto const group_contains_available = group.ballot(state == detail::equal_result::AVAILABLE); if (group_contains_available) { auto const src_lane = __ffs(group_contains_available) - 1; - auto const status = [&, target_idx = intra_window_index]() { + auto const status = [&, target_idx = intra_bucket_index]() { if (group.thread_rank() != src_lane) { return insert_result::CONTINUE; } return ref_.attempt_insert_or_apply( - slot_ptr, window_slots[target_idx], val, op); + slot_ptr, bucket_slots[target_idx], val, op); }(); switch (group.shfl(status, src_lane)) { @@ -1070,7 +1070,7 @@ class operator_impl< using const_iterator = typename base_type::const_iterator; static constexpr auto cg_size = base_type::cg_size; - static constexpr auto window_size = base_type::window_size; + static constexpr auto bucket_size = base_type::bucket_size; public: /** @@ -1134,7 +1134,7 @@ class operator_impl< using value_type = typename base_type::value_type; static constexpr auto cg_size = base_type::cg_size; - static constexpr auto window_size = base_type::window_size; + static constexpr auto bucket_size = base_type::bucket_size; public: /** @@ -1188,7 +1188,7 @@ class operator_impl< using value_type = typename base_type::value_type; static constexpr auto cg_size = base_type::cg_size; - static constexpr auto window_size = base_type::window_size; + static constexpr auto bucket_size = base_type::bucket_size; public: /** @@ -1251,7 +1251,7 @@ class operator_impl< using const_iterator = typename base_type::const_iterator; static constexpr auto cg_size = base_type::cg_size; - static constexpr auto window_size = base_type::window_size; + static constexpr auto bucket_size = base_type::bucket_size; public: /** @@ -1314,7 +1314,7 @@ class operator_impl< using const_iterator = typename base_type::const_iterator; static constexpr auto cg_size = base_type::cg_size; - static constexpr auto window_size = base_type::window_size; + static constexpr auto bucket_size = base_type::bucket_size; public: /** @@ -1383,7 +1383,7 @@ class operator_impl< using size_type = typename base_type::size_type; static constexpr auto cg_size = base_type::cg_size; - static constexpr auto window_size = base_type::window_size; + static constexpr auto bucket_size = base_type::bucket_size; public: /** diff --git a/include/cuco/detail/static_multimap/device_view_impl.inl b/include/cuco/detail/static_multimap/device_view_impl.inl index 25aa0a63b..223ea8395 100644 --- a/include/cuco/detail/static_multimap/device_view_impl.inl +++ b/include/cuco/detail/static_multimap/device_view_impl.inl @@ -224,7 +224,7 @@ class static_multimap::device_mutab private: /** - * @brief Enumeration of the possible results of attempting to insert into a hash bucket. + * @brief Enumeration of the possible results of attempting to insert into a hash slot. */ enum class insert_result { CONTINUE, ///< Insert did not succeed, continue trying to insert @@ -363,12 +363,12 @@ class static_multimap::device_mutab (detail::bitwise_compare(arr[0].first, this->get_empty_key_sentinel())); auto const second_slot_is_empty = (detail::bitwise_compare(arr[1].first, this->get_empty_key_sentinel())); - auto const window_contains_empty = g.ballot(first_slot_is_empty or second_slot_is_empty); + auto const bucket_contains_empty = g.ballot(first_slot_is_empty or second_slot_is_empty); - if (window_contains_empty) { + if (bucket_contains_empty) { // the first lane in the group with an empty slot will attempt the insert insert_result status{insert_result::CONTINUE}; - uint32_t src_lane = __ffs(window_contains_empty) - 1; + uint32_t src_lane = __ffs(bucket_contains_empty) - 1; if (g.thread_rank() == src_lane) { auto insert_location = first_slot_is_empty ? current_slot : current_slot + 1; // One single CAS operation since vector loads are dedicated to packable pairs @@ -379,10 +379,10 @@ class static_multimap::device_mutab if (g.any(status == insert_result::SUCCESS)) { return; } // if we've gotten this far, a different key took our spot // before we could insert. We need to retry the insert on the - // same window + // same bucket } - // if there are no empty slots in the current window, - // we move onto the next window + // if there are no empty slots in the current bucket, + // we move onto the next bucket else { current_slot = next_slot(current_slot); } @@ -413,12 +413,12 @@ class static_multimap::device_mutab // the sentinel is not a valid key value. Therefore, first check for the sentinel auto const slot_is_empty = detail::bitwise_compare(existing_key, this->get_empty_key_sentinel()); - auto const window_contains_empty = g.ballot(slot_is_empty); + auto const bucket_contains_empty = g.ballot(slot_is_empty); - if (window_contains_empty) { + if (bucket_contains_empty) { // the first lane in the group with an empty slot will attempt the insert insert_result status{insert_result::CONTINUE}; - uint32_t src_lane = __ffs(window_contains_empty) - 1; + uint32_t src_lane = __ffs(bucket_contains_empty) - 1; if (g.thread_rank() == src_lane) { #if (__CUDA_ARCH__ < 700) @@ -432,10 +432,10 @@ class static_multimap::device_mutab if (g.any(status == insert_result::SUCCESS)) { return; } // if we've gotten this far, a different key took our spot // before we could insert. We need to retry the insert on the - // same window + // same bucket } - // if there are no empty slots in the current window, - // we move onto the next window + // if there are no empty slots in the current bucket, + // we move onto the next bucket else { current_slot = next_slot(current_slot); } @@ -627,8 +627,8 @@ class static_multimap::device_view_ // we found an empty slot, meaning that the key we're searching for isn't present if (g.any(first_slot_is_empty or second_slot_is_empty)) { return false; } - // otherwise, all slots in the current window are full with other keys, so we move onto the - // next window + // otherwise, all slots in the current bucket are full with other keys, so we move onto the + // next bucket current_slot = next_slot(current_slot); } } @@ -686,8 +686,8 @@ class static_multimap::device_view_ // we found an empty slot, meaning that the key we're searching for isn't present if (g.any(slot_is_empty)) { return false; } - // otherwise, all slots in the current window are full with other keys, so we move onto the - // next window + // otherwise, all slots in the current bucket are full with other keys, so we move onto the + // next bucket current_slot = next_slot(current_slot); } } @@ -1203,7 +1203,7 @@ class static_multimap::device_view_ *(contained_val_begin) = this->get_empty_value_sentinel(); } } - return; // exit if any slot in the current window is empty + return; // exit if any slot in the current bucket is empty } current_slot = next_slot(current_slot); @@ -1302,7 +1302,7 @@ class static_multimap::device_view_ *(contained_val_begin) = this->get_empty_value_sentinel(); } } - return; // exit if any slot in the current window is empty + return; // exit if any slot in the current bucket is empty } current_slot = next_slot(current_slot); diff --git a/include/cuco/detail/static_multimap/static_multimap_ref.inl b/include/cuco/detail/static_multimap/static_multimap_ref.inl index 85e770527..a38ddf19d 100644 --- a/include/cuco/detail/static_multimap/static_multimap_ref.inl +++ b/include/cuco/detail/static_multimap/static_multimap_ref.inl @@ -240,9 +240,9 @@ __host__ __device__ constexpr static_multimap_ref::extent_type static_multimap_ref:: - window_extent() const noexcept + bucket_extent() const noexcept { - return impl_.window_extent(); + return impl_.bucket_extent(); } template __device__ constexpr auto static_multimap_ref::make_copy( CG const& tile, - window_type* const memory_to_use, + bucket_type* const memory_to_use, cuda_thread_scope scope) const noexcept { impl_.make_copy(tile, memory_to_use); @@ -383,7 +383,7 @@ static_multimap_refkey_eq(), impl_.probing_scheme(), scope, - storage_ref_type{this->window_extent(), memory_to_use}}; + storage_ref_type{this->bucket_extent(), memory_to_use}}; } template __device__ void for_each(cooperative_groups::thread_block_tile const& group, @@ -649,7 +649,7 @@ class operator_impl< using const_iterator = typename base_type::const_iterator; static constexpr auto cg_size = base_type::cg_size; - static constexpr auto window_size = base_type::window_size; + static constexpr auto bucket_size = base_type::bucket_size; public: /** @@ -712,7 +712,7 @@ class operator_impl< using size_type = typename base_type::size_type; static constexpr auto cg_size = base_type::cg_size; - static constexpr auto window_size = base_type::window_size; + static constexpr auto bucket_size = base_type::bucket_size; public: /** diff --git a/include/cuco/detail/static_multiset/static_multiset_ref.inl b/include/cuco/detail/static_multiset/static_multiset_ref.inl index 650664569..1a059d0a6 100644 --- a/include/cuco/detail/static_multiset/static_multiset_ref.inl +++ b/include/cuco/detail/static_multiset/static_multiset_ref.inl @@ -177,10 +177,10 @@ __host__ __device__ constexpr static_multiset_ref::extent_type -static_multiset_ref::window_extent() +static_multiset_ref::bucket_extent() const noexcept { - return impl_.window_extent(); + return impl_.bucket_extent(); } template __device__ void for_each(cooperative_groups::thread_block_tile const& group, @@ -721,7 +721,7 @@ class operator_impl< using size_type = typename base_type::size_type; static constexpr auto cg_size = base_type::cg_size; - static constexpr auto window_size = base_type::window_size; + static constexpr auto bucket_size = base_type::bucket_size; public: /** diff --git a/include/cuco/detail/static_set/kernels.cuh b/include/cuco/detail/static_set/kernels.cuh index b0d866978..b3488094a 100644 --- a/include/cuco/detail/static_set/kernels.cuh +++ b/include/cuco/detail/static_set/kernels.cuh @@ -82,14 +82,14 @@ __device__ void group_retrieve(InputIt first, using Key = typename Ref::key_type; auto constexpr tile_size = Ref::cg_size; - auto constexpr window_size = Ref::window_size; + auto constexpr bucket_size = Ref::bucket_size; auto idx = cuco::detail::global_thread_id() / tile_size; auto const stride = cuco::detail::grid_stride() / tile_size; auto const block = cg::this_thread_block(); auto const tile = cg::tiled_partition(block); - auto constexpr flushing_tile_size = cuco::detail::warp_size() / window_size; + auto constexpr flushing_tile_size = cuco::detail::warp_size() / bucket_size; // random choice to tune auto constexpr flushing_buffer_size = 2 * flushing_tile_size; auto constexpr num_flushing_tiles = BlockSize / flushing_tile_size; diff --git a/include/cuco/detail/static_set/static_set.inl b/include/cuco/detail/static_set/static_set.inl index 7f53588ee..c2bd33637 100644 --- a/include/cuco/detail/static_set/static_set.inl +++ b/include/cuco/detail/static_set/static_set.inl @@ -502,7 +502,7 @@ template ::rehash( size_type capacity, cuda::stream_ref stream) { - auto const extent = make_window_extent(capacity); + auto const extent = make_bucket_extent(capacity); this->impl_->rehash(extent, *this, stream); } @@ -529,7 +529,7 @@ template ::rehash_async( size_type capacity, cuda::stream_ref stream) { - auto const extent = make_window_extent(capacity); + auto const extent = make_bucket_extent(capacity); this->impl_->rehash_async(extent, *this, stream); } diff --git a/include/cuco/detail/static_set/static_set_ref.inl b/include/cuco/detail/static_set/static_set_ref.inl index 123956a7d..990593738 100644 --- a/include/cuco/detail/static_set/static_set_ref.inl +++ b/include/cuco/detail/static_set/static_set_ref.inl @@ -210,10 +210,10 @@ __host__ __device__ constexpr static_set_ref::extent_type -static_set_ref::window_extent() +static_set_ref::bucket_extent() const noexcept { - return impl_.window_extent(); + return impl_.bucket_extent(); } template __device__ constexpr auto static_set_ref::make_copy( CG const& tile, - window_type* const memory_to_use, + bucket_type* const memory_to_use, cuda_thread_scope scope) const noexcept { this->impl_.make_copy(tile, memory_to_use); @@ -324,7 +324,7 @@ static_set_ref::m this->key_eq(), this->probing_scheme(), scope, - storage_ref_type{this->window_extent(), memory_to_use}}; + storage_ref_type{this->bucket_extent(), memory_to_use}}; } template -constexpr aow_storage::aow_storage(Extent size, - Allocator const& allocator) - : detail::aow_storage_base{size}, +template +constexpr bucket_storage::bucket_storage( + Extent size, Allocator const& allocator) + : detail::bucket_storage_base{size}, allocator_{allocator}, - window_deleter_{capacity(), allocator_}, - windows_{allocator_.allocate(capacity()), window_deleter_} + bucket_deleter_{capacity(), allocator_}, + buckets_{allocator_.allocate(capacity()), bucket_deleter_} { } -template -constexpr aow_storage::window_type* -aow_storage::data() const noexcept +template +constexpr bucket_storage::bucket_type* +bucket_storage::data() const noexcept { - return windows_.get(); + return buckets_.get(); } -template -constexpr aow_storage::allocator_type -aow_storage::allocator() const noexcept +template +constexpr bucket_storage::allocator_type +bucket_storage::allocator() const noexcept { return allocator_; } -template -constexpr aow_storage::ref_type -aow_storage::ref() const noexcept +template +constexpr bucket_storage::ref_type +bucket_storage::ref() const noexcept { - return ref_type{this->window_extent(), this->data()}; + return ref_type{this->bucket_extent(), this->data()}; } -template -void aow_storage::initialize(value_type key, - cuda::stream_ref stream) +template +void bucket_storage::initialize(value_type key, + cuda::stream_ref stream) { this->initialize_async(key, stream); stream.wait(); } -template -void aow_storage::initialize_async( +template +void bucket_storage::initialize_async( value_type key, cuda::stream_ref stream) noexcept { - if (this->num_windows() == 0) { return; } + if (this->num_buckets() == 0) { return; } auto constexpr cg_size = 1; auto constexpr stride = 4; - auto const grid_size = cuco::detail::grid_size(this->num_windows(), cg_size, stride); + auto const grid_size = cuco::detail::grid_size(this->num_buckets(), cg_size, stride); detail::initialize<<>>( - this->data(), this->num_windows(), key); + this->data(), this->num_buckets(), key); } -template -__host__ __device__ constexpr aow_storage_ref::aow_storage_ref( - Extent size, window_type* windows) noexcept - : detail::aow_storage_base{size}, windows_{windows} +template +__host__ __device__ constexpr bucket_storage_ref::bucket_storage_ref( + Extent size, bucket_type* buckets) noexcept + : detail::bucket_storage_base{size}, buckets_{buckets} { } -template -struct aow_storage_ref::iterator { +template +struct bucket_storage_ref::iterator { public: using iterator_category = std::input_iterator_tag; ///< iterator category using reference = value_type&; ///< iterator reference type @@ -166,40 +166,40 @@ struct aow_storage_ref::iterator { value_type* current_{}; ///< Pointer to the current slot }; -template -__device__ constexpr aow_storage_ref::iterator -aow_storage_ref::end() noexcept +template +__device__ constexpr bucket_storage_ref::iterator +bucket_storage_ref::end() noexcept { return iterator{reinterpret_cast(this->data() + this->capacity())}; } -template -__device__ constexpr aow_storage_ref::const_iterator -aow_storage_ref::end() const noexcept +template +__device__ constexpr bucket_storage_ref::const_iterator +bucket_storage_ref::end() const noexcept { return const_iterator{reinterpret_cast(this->data() + this->capacity())}; } -template -__device__ constexpr aow_storage_ref::window_type* -aow_storage_ref::data() noexcept +template +__device__ constexpr bucket_storage_ref::bucket_type* +bucket_storage_ref::data() noexcept { - return windows_; + return buckets_; } -template -__device__ constexpr aow_storage_ref::window_type* -aow_storage_ref::data() const noexcept +template +__device__ constexpr bucket_storage_ref::bucket_type* +bucket_storage_ref::data() const noexcept { - return windows_; + return buckets_; } -template -__device__ constexpr aow_storage_ref::window_type -aow_storage_ref::operator[](size_type index) const noexcept +template +__device__ constexpr bucket_storage_ref::bucket_type +bucket_storage_ref::operator[](size_type index) const noexcept { - return *reinterpret_cast( - __builtin_assume_aligned(this->data() + index, sizeof(value_type) * window_size)); + return *reinterpret_cast( + __builtin_assume_aligned(this->data() + index, sizeof(value_type) * bucket_size)); } } // namespace cuco diff --git a/include/cuco/detail/storage/aow_storage_base.cuh b/include/cuco/detail/storage/bucket_storage_base.cuh similarity index 53% rename from include/cuco/detail/storage/aow_storage_base.cuh rename to include/cuco/detail/storage/bucket_storage_base.cuh index 06104c39f..4bf11feda 100644 --- a/include/cuco/detail/storage/aow_storage_base.cuh +++ b/include/cuco/detail/storage/bucket_storage_base.cuh @@ -26,55 +26,56 @@ namespace cuco { namespace detail { /** - * @brief Window data structure type + * @brief Bucket data structure type  * - * @tparam T Window slot type - * @tparam WindowSize Number of elements per window + * @tparam T Bucket slot type + * @tparam BucketSize Number of elements per bucket  */ -template -struct window : public cuda::std::array { +template +struct bucket : public cuda::std::array { public: - static int32_t constexpr window_size = WindowSize; ///< Number of slots per window + static int32_t constexpr bucket_size = BucketSize; ///< Number of slots per bucket }; /** - * @brief Base class of array of slot windows open addressing storage. + * @brief Base class of array of slot buckets open addressing storage. * * @note This should NOT be used directly. * * @tparam T Slot type - * @tparam WindowSize Number of slots in each window - * @tparam Extent Type of extent denoting the number of windows + * @tparam BucketSize Number of slots in each bucket + * @tparam Extent Type of extent denoting the number of buckets */ -template -class aow_storage_base : public storage_base { +template +class bucket_storage_base : public storage_base { public: /** - * @brief The number of elements (slots) processed per window. + * @brief The number of elements (slots) processed per bucket. */ - static constexpr int32_t window_size = WindowSize; + static constexpr int32_t bucket_size = BucketSize; using extent_type = typename storage_base::extent_type; ///< Storage extent type using size_type = typename storage_base::size_type; ///< Storage size type using value_type = T; ///< Slot type - using window_type = window; ///< Slot window type + using bucket_type = bucket; ///< Slot bucket type /** - * @brief Constructor of AoW base storage. + * @brief Constructor of array of bucket base storage. * - * @param size Number of windows to store + * @param size Number of buckets to store */ - __host__ __device__ explicit constexpr aow_storage_base(Extent size) : storage_base{size} + __host__ __device__ explicit constexpr bucket_storage_base(Extent size) + : storage_base{size} { } /** - * @brief Gets the total number of slot windows in the current storage. + * @brief Gets the total number of slot buckets in the current storage. * - * @return The total number of slot windows + * @return The total number of slot buckets */ - [[nodiscard]] __host__ __device__ constexpr size_type num_windows() const noexcept + [[nodiscard]] __host__ __device__ constexpr size_type num_buckets() const noexcept { return storage_base::capacity(); } @@ -86,15 +87,15 @@ class aow_storage_base : public storage_base { */ [[nodiscard]] __host__ __device__ constexpr size_type capacity() const noexcept { - return storage_base::capacity() * window_size; + return storage_base::capacity() * bucket_size; } /** - * @brief Gets the window extent of the current storage. + * @brief Gets the bucket extent of the current storage. * - * @return The window extent. + * @return The bucket extent. */ - [[nodiscard]] __host__ __device__ constexpr extent_type window_extent() const noexcept + [[nodiscard]] __host__ __device__ constexpr extent_type bucket_extent() const noexcept { return storage_base::extent(); } diff --git a/include/cuco/detail/storage/kernels.cuh b/include/cuco/detail/storage/kernels.cuh index 55e73bb6f..b2f425071 100644 --- a/include/cuco/detail/storage/kernels.cuh +++ b/include/cuco/detail/storage/kernels.cuh @@ -25,26 +25,26 @@ namespace detail { CUCO_SUPPRESS_KERNEL_WARNINGS /** - * @brief Initializes each slot in the window storage to contain `value`. + * @brief Initializes each slot in the bucket storage to contain `value`. * - * @tparam WindowT Window type + * @tparam BucketT Bucket type * - * @param windows Pointer to flat storage for windows - * @param n Number of input windows + * @param buckets Pointer to flat storage for buckets + * @param n Number of input buckets * @param value Value to which all values in `slots` are initialized */ -template -CUCO_KERNEL void initialize(WindowT* windows, +template +CUCO_KERNEL void initialize(BucketT* buckets, cuco::detail::index_type n, - typename WindowT::value_type value) + typename BucketT::value_type value) { auto const loop_stride = cuco::detail::grid_stride(); auto idx = cuco::detail::global_thread_id(); while (idx < n) { - auto& window_slots = *(windows + idx); + auto& bucket_slots = *(buckets + idx); #pragma unroll - for (auto& slot : window_slots) { + for (auto& slot : bucket_slots) { slot = value; } idx += loop_stride; diff --git a/include/cuco/detail/storage/storage.cuh b/include/cuco/detail/storage/storage.cuh index 2cd82a1e4..33c866390 100644 --- a/include/cuco/detail/storage/storage.cuh +++ b/include/cuco/detail/storage/storage.cuh @@ -16,7 +16,7 @@ #pragma once -#include +#include namespace cuco { namespace detail { @@ -25,7 +25,7 @@ namespace detail { * * @tparam StorageImpl Storage implementation class * @tparam T Storage element type - * @tparam Extent Type of extent denoting number of windows + * @tparam Extent Type of extent denoting number of buckets * @tparam Allocator Type of allocator used for device storage */ template @@ -37,17 +37,17 @@ class storage : StorageImpl::template impl { using value_type = typename impl_type::value_type; ///< Storage value type using allocator_type = typename impl_type::allocator_type; ///< Storage value type - /// Number of elements per window - static constexpr int window_size = impl_type::window_size; + /// Number of elements per bucket + static constexpr int bucket_size = impl_type::bucket_size; using impl_type::allocator; + using impl_type::bucket_extent; using impl_type::capacity; using impl_type::data; using impl_type::initialize; using impl_type::initialize_async; - using impl_type::num_windows; + using impl_type::num_buckets; using impl_type::ref; - using impl_type::window_extent; /** * @brief Constructs storage. diff --git a/include/cuco/extent.cuh b/include/cuco/extent.cuh index 3fdb5a572..157aff9e4 100644 --- a/include/cuco/extent.cuh +++ b/include/cuco/extent.cuh @@ -73,9 +73,9 @@ struct extent { }; /** - * @brief Window extent strong type. + * @brief Bucket extent strong type. * - * @note This type is used internally and can only be constructed using the `make_window_extent' + * @note This type is used internally and can only be constructed using the `make_bucket_extent' * factory method. * * @tparam SizeType Size type @@ -83,18 +83,23 @@ struct extent { * */ template -struct window_extent; +struct bucket_extent; + +/// Alias for bucket_extent +template +using window_extent = bucket_extent; /** - * @brief Computes a valid window extent/capacity for a given container type. + * @brief Computes valid bucket extent based on given parameters. * * @note The actual capacity of a container (map/set) should be exclusively determined by the return * value of this utility since the output depends on the requested low-bound size, the probing * scheme, and the storage. This utility is used internally during container constructions while for * container ref constructions, it would be users' responsibility to use this function to determine - * the capacity ctor argument for the container. + * the input size of the ref. * - * @tparam Container Container type to compute the extent for + * @tparam CGSize Number of elements handled per CG + * @tparam BucketSize Number of elements handled per Bucket * @tparam SizeType Size type * @tparam N Extent * @@ -102,13 +107,32 @@ struct window_extent; * * @throw If the input extent is invalid * - * @return Resulting valid `window extent` + * @return Resulting valid extent */ -template +template +[[nodiscard]] auto constexpr make_bucket_extent(extent ext); + +/** + * @brief Computes valid bucket extent based on given parameters. + * + * @deprecated Use the equivalent `make_bucket_extent` instead. + * + * @tparam CGSize Number of elements handled per CG + * @tparam BucketSize Number of elements handled per Bucket + * @tparam SizeType Size type + * @tparam N Extent + * + * @param ext The input extent + * + * @throw If the input extent is invalid + * + * @return Resulting valid extent + */ +template [[nodiscard]] auto constexpr make_window_extent(extent ext); /** - * @brief Computes a valid capacity for a given container type. + * @brief Computes valid bucket extent/capacity based on given parameters. * * @note The actual capacity of a container (map/set) should be exclusively determined by the return * value of this utility since the output depends on the requested low-bound size, the probing @@ -116,7 +140,8 @@ template * container ref constructions, it would be users' responsibility to use this function to determine * the capacity ctor argument for the container. * - * @tparam Container Container type to compute the extent for + * @tparam CGSize Number of elements handled per CG + * @tparam BucketSize Number of elements handled per Bucket * @tparam SizeType Size type * * @param size The input size @@ -125,20 +150,43 @@ template * * @return Resulting valid extent */ -template +template +[[nodiscard]] auto constexpr make_bucket_extent(SizeType size); + +/** + * @brief Computes valid bucket extent/capacity based on given parameters. + * + * @deprecated Use the equivalent `make_bucket_extent` instead. + * + * @tparam CGSize Number of elements handled per CG + * @tparam BucketSize Number of elements handled per Bucket + * @tparam SizeType Size type + * + * @param size The input size + * + * @throw If the input size is invalid + * + * @return Resulting valid extent + */ +template [[nodiscard]] auto constexpr make_window_extent(SizeType size); +template +[[nodiscard]] auto constexpr make_bucket_extent(cuco::extent ext); + +template +[[nodiscard]] auto constexpr make_bucket_extent(SizeType ext); + /** - * @brief Computes valid window extent based on given parameters. + * @brief Computes a valid bucket extent/capacity for a given container type. * * @note The actual capacity of a container (map/set) should be exclusively determined by the return * value of this utility since the output depends on the requested low-bound size, the probing * scheme, and the storage. This utility is used internally during container constructions while for * container ref constructions, it would be users' responsibility to use this function to determine - * the input size of the ref. + * the capacity ctor argument for the container. * - * @tparam CGSize Number of elements handled per CG - * @tparam WindowSize Number of elements handled per Window + * @tparam Container Container type to compute the extent for * @tparam SizeType Size type * @tparam N Extent * @@ -146,13 +194,31 @@ template * * @throw If the input extent is invalid * - * @return Resulting valid extent + * @return Resulting valid `bucket extent` + */ +template +[[nodiscard]] auto constexpr make_bucket_extent(extent ext); + +/** + * @brief Computes a valid bucket extent/capacity for a given container type. + * + * @deprecated Use the equivalent `make_bucket_extent` instead. + * + * @tparam Container Container type to compute the extent for + * @tparam SizeType Size type + * @tparam N Extent + * + * @param ext The input extent + * + * @throw If the input extent is invalid + * + * @return Resulting valid `bucket extent` */ -template +template [[nodiscard]] auto constexpr make_window_extent(extent ext); /** - * @brief Computes valid window extent/capacity based on given parameters. + * @brief Computes a valid capacity for a given container type. * * @note The actual capacity of a container (map/set) should be exclusively determined by the return * value of this utility since the output depends on the requested low-bound size, the probing @@ -160,8 +226,24 @@ template * container ref constructions, it would be users' responsibility to use this function to determine * the capacity ctor argument for the container. * - * @tparam CGSize Number of elements handled per CG - * @tparam WindowSize Number of elements handled per Window + * @tparam Container Container type to compute the extent for + * @tparam SizeType Size type + * + * @param size The input size + * + * @throw If the input size is invalid + * + * @return Resulting valid extent + */ +template +[[nodiscard]] auto constexpr make_bucket_extent(SizeType size); + +/** + * @brief Computes a valid capacity for a given container type. + * + * @deprecated Use the equivalent `make_bucket_extent` instead. + * + * @tparam Container Container type to compute the extent for * @tparam SizeType Size type * * @param size The input size @@ -170,7 +252,7 @@ template * * @return Resulting valid extent */ -template +template [[nodiscard]] auto constexpr make_window_extent(SizeType size); } // namespace cuco diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index 48db81d04..8b6b92e3a 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -83,7 +83,7 @@ namespace cuco { * @tparam KeyEqual Binary callable type used to compare two keys for equality * @tparam ProbingScheme Probing scheme (see `include/cuco/probing_scheme.cuh` for choices) * @tparam Allocator Type of allocator used for device storage - * @tparam Storage Slot window storage type + * @tparam Storage Slot bucket storage type */ template [[nodiscard]] __device__ constexpr auto make_copy( CG const& tile, - window_type* const memory_to_use, + bucket_type* const memory_to_use, cuda_thread_scope scope = {}) const noexcept; /** diff --git a/include/cuco/static_multimap.cuh b/include/cuco/static_multimap.cuh index f8067405b..e6e2a139e 100644 --- a/include/cuco/static_multimap.cuh +++ b/include/cuco/static_multimap.cuh @@ -86,7 +86,7 @@ namespace experimental { * @tparam KeyEqual Binary callable type used to compare two keys for equality * @tparam ProbingScheme Probing scheme (see `include/cuco/probing_scheme.cuh` for choices) * @tparam Allocator Type of allocator used for device storage - * @tparam Storage Slot window storage type + * @tparam Storage Slot bucket storage type */ template [[nodiscard]] __device__ constexpr auto make_copy( CG const& tile, - window_type* const memory_to_use, + bucket_type* const memory_to_use, cuda_thread_scope scope = {}) const noexcept; /** diff --git a/include/cuco/static_multiset.cuh b/include/cuco/static_multiset.cuh index 4cd5277d5..6a3843329 100644 --- a/include/cuco/static_multiset.cuh +++ b/include/cuco/static_multiset.cuh @@ -72,7 +72,7 @@ namespace cuco { * @tparam KeyEqual Binary callable type used to compare two keys for equality * @tparam ProbingScheme Probing scheme (see `include/cuco/probing_scheme.cuh` for choices) * @tparam Allocator Type of allocator used for device storage - * @tparam Storage Slot window storage type + * @tparam Storage Slot bucket storage type */ template , @@ -88,7 +88,7 @@ class static_multiset { public: static constexpr auto cg_size = impl_type::cg_size; ///< CG size used for probing - static constexpr auto window_size = impl_type::window_size; ///< Window size used for probing + static constexpr auto bucket_size = impl_type::bucket_size; ///< Bucket size used for probing static constexpr auto thread_scope = impl_type::thread_scope; ///< CUDA thread scope using key_type = typename impl_type::key_type; ///< Key type @@ -97,7 +97,7 @@ class static_multiset { using size_type = typename impl_type::size_type; ///< Size type using key_equal = typename impl_type::key_equal; ///< Key equality comparator type using allocator_type = typename impl_type::allocator_type; ///< Allocator type - /// Non-owning window storage ref type + /// Non-owning bucket storage ref type using storage_ref_type = typename impl_type::storage_ref_type; using probing_scheme_type = typename impl_type::probing_scheme_type; ///< Probing scheme type using hasher = typename probing_scheme_type::hasher; ///< Hash function type @@ -128,7 +128,7 @@ class static_multiset { * values and CUDA stream * * The actual multiset capacity depends on the given `capacity`, the probing scheme, CG size, and - * the window size and it is computed via the `make_window_extent` factory. Insert operations will + * the bucket size and it is computed via the `make_bucket_extent` factory. Insert operations will * not automatically grow the set. Attempting to insert more unique keys than the capacity of the * multiset results in undefined behavior. * @@ -162,7 +162,7 @@ class static_multiset { * the desired load factor without manually computing the desired capacity. The actual set * capacity will be a size no smaller than `ceil(n / desired_load_factor)`. It's determined by * multiple factors including the given `n`, the desired load factor, the probing scheme, the CG - * size, and the window size and is computed via the `make_window_extent` factory. + * size, and the bucket size and is computed via the `make_bucket_extent` factory. * @note Insert operations will not automatically grow the container. * @note Attempting to insert more unique keys than the capacity of the container results in * undefined behavior. @@ -201,7 +201,7 @@ class static_multiset { * and CUDA stream. * * The actual set capacity depends on the given `capacity`, the probing scheme, CG size, and the - * window size and it is computed via the `make_window_extent` factory. Insert operations will not + * bucket size and it is computed via the `make_bucket_extent` factory. Insert operations will not * automatically grow the set. Attempting to insert more unique keys than the capacity of the * multiset results in undefined behavior. * diff --git a/include/cuco/static_multiset_ref.cuh b/include/cuco/static_multiset_ref.cuh index 96365570c..203cdc8b5 100644 --- a/include/cuco/static_multiset_ref.cuh +++ b/include/cuco/static_multiset_ref.cuh @@ -76,7 +76,8 @@ class static_multiset_ref using probing_scheme_type = ProbingScheme; ///< Type of probing scheme using hasher = typename probing_scheme_type::hasher; ///< Hash function type using storage_ref_type = StorageRef; ///< Type of storage ref - using window_type = typename storage_ref_type::window_type; ///< Window type + using bucket_type = typename storage_ref_type::bucket_type; ///< Bucket type + using window_type = bucket_type; ///< Bucket type using value_type = typename storage_ref_type::value_type; ///< Storage element type using extent_type = typename storage_ref_type::extent_type; ///< Extent type using size_type = typename storage_ref_type::size_type; ///< Probing scheme size type @@ -85,8 +86,8 @@ class static_multiset_ref using const_iterator = typename storage_ref_type::const_iterator; ///< Const slot iterator type static constexpr auto cg_size = probing_scheme_type::cg_size; ///< Cooperative group size - static constexpr auto window_size = - storage_ref_type::window_size; ///< Number of elements handled per window + static constexpr auto bucket_size = + storage_ref_type::bucket_size; ///< Number of elements handled per bucket static constexpr auto thread_scope = impl_type::thread_scope; ///< CUDA thread scope /** @@ -143,11 +144,11 @@ class static_multiset_ref [[nodiscard]] __host__ __device__ constexpr auto capacity() const noexcept; /** - * @brief Gets the window extent of the current storage. + * @brief Gets the bucket extent of the current storage. * - * @return The window extent. + * @return The bucket extent. */ - [[nodiscard]] __host__ __device__ constexpr extent_type window_extent() const noexcept; + [[nodiscard]] __host__ __device__ constexpr extent_type bucket_extent() const noexcept; /** * @brief Gets the sentinel value used to represent an empty key slot. diff --git a/include/cuco/static_set.cuh b/include/cuco/static_set.cuh index d5f7acb95..ce2f799b0 100644 --- a/include/cuco/static_set.cuh +++ b/include/cuco/static_set.cuh @@ -77,7 +77,7 @@ namespace cuco { * @tparam KeyEqual Binary callable type used to compare two keys for equality * @tparam ProbingScheme Probing scheme (see `include/cuco/probing_scheme.cuh` for choices) * @tparam Allocator Type of allocator used for device storage - * @tparam Storage Slot window storage type + * @tparam Storage Slot bucket storage type */ template , @@ -93,7 +93,7 @@ class static_set { public: static constexpr auto cg_size = impl_type::cg_size; ///< CG size used for probing - static constexpr auto window_size = impl_type::window_size; ///< Window size used for probing + static constexpr auto bucket_size = impl_type::bucket_size; ///< Bucket size used for probing static constexpr auto thread_scope = impl_type::thread_scope; ///< CUDA thread scope using key_type = typename impl_type::key_type; ///< Key type @@ -102,7 +102,7 @@ class static_set { using size_type = typename impl_type::size_type; ///< Size type using key_equal = typename impl_type::key_equal; ///< Key equality comparator type using allocator_type = typename impl_type::allocator_type; ///< Allocator type - /// Non-owning window storage ref type + /// Non-owning bucket storage ref type using storage_ref_type = typename impl_type::storage_ref_type; using probing_scheme_type = typename impl_type::probing_scheme_type; ///< Probing scheme type using hasher = typename probing_scheme_type::hasher; ///< Hash function type @@ -133,7 +133,7 @@ class static_set { * and CUDA stream * * The actual set capacity depends on the given `capacity`, the probing scheme, CG size, and the - * window size and it is computed via the `make_window_extent` factory. Insert operations will not + * bucket size and it is computed via the `make_bucket_extent` factory. Insert operations will not * automatically grow the set. Attempting to insert more unique keys than the capacity of the set * results in undefined behavior. * @@ -167,7 +167,7 @@ class static_set { * the desired load factor without manually computing the desired capacity. The actual set * capacity will be a size no smaller than `ceil(n / desired_load_factor)`. It's determined by * multiple factors including the given `n`, the desired load factor, the probing scheme, the CG - * size, and the window size and is computed via the `make_window_extent` factory. + * size, and the bucket size and is computed via the `make_bucket_extent` factory. * @note Insert operations will not automatically grow the container. * @note Attempting to insert more unique keys than the capacity of the container results in * undefined behavior. @@ -206,7 +206,7 @@ class static_set { * and CUDA stream. * * The actual set capacity depends on the given `capacity`, the probing scheme, CG size, and the - * window size and it is computed via the `make_window_extent` factory. Insert operations will not + * bucket size and it is computed via the `make_bucket_extent` factory. Insert operations will not * automatically grow the set. Attempting to insert more unique keys than the capacity of the set * results in undefined behavior. * diff --git a/include/cuco/static_set_ref.cuh b/include/cuco/static_set_ref.cuh index ae24d1ae6..846ad9252 100644 --- a/include/cuco/static_set_ref.cuh +++ b/include/cuco/static_set_ref.cuh @@ -76,7 +76,8 @@ class static_set_ref using probing_scheme_type = ProbingScheme; ///< Type of probing scheme using hasher = typename probing_scheme_type::hasher; ///< Hash function type using storage_ref_type = StorageRef; ///< Type of storage ref - using window_type = typename storage_ref_type::window_type; ///< Window type + using bucket_type = typename storage_ref_type::bucket_type; ///< Bucket type + using window_type = bucket_type; ///< Bucket type using value_type = typename storage_ref_type::value_type; ///< Storage element type using extent_type = typename storage_ref_type::extent_type; ///< Extent type using size_type = typename storage_ref_type::size_type; ///< Probing scheme size type @@ -85,8 +86,8 @@ class static_set_ref using const_iterator = typename storage_ref_type::const_iterator; ///< Const slot iterator type static constexpr auto cg_size = probing_scheme_type::cg_size; ///< Cooperative group size - static constexpr auto window_size = - storage_ref_type::window_size; ///< Number of elements handled per window + static constexpr auto bucket_size = + storage_ref_type::bucket_size; ///< Number of elements handled per bucket static constexpr auto thread_scope = impl_type::thread_scope; ///< CUDA thread scope /** @@ -141,11 +142,11 @@ class static_set_ref [[nodiscard]] __host__ __device__ constexpr auto capacity() const noexcept; /** - * @brief Gets the window extent of the current storage. + * @brief Gets the bucket extent of the current storage. * - * @return The window extent. + * @return The bucket extent. */ - [[nodiscard]] __host__ __device__ constexpr extent_type window_extent() const noexcept; + [[nodiscard]] __host__ __device__ constexpr extent_type bucket_extent() const noexcept; /** * @brief Gets the sentinel value used to represent an empty key slot. @@ -263,7 +264,7 @@ class static_set_ref template [[nodiscard]] __device__ constexpr auto make_copy( CG const& tile, - window_type* const memory_to_use, + bucket_type* const memory_to_use, cuda_thread_scope scope = {}) const noexcept; /** diff --git a/include/cuco/storage.cuh b/include/cuco/storage.cuh index a1e591e5c..c9da5ca3c 100644 --- a/include/cuco/storage.cuh +++ b/include/cuco/storage.cuh @@ -23,26 +23,26 @@ namespace cuco { /** * @brief Public storage class. * - * @note This is a public interface used to control storage window size. A window consists of a - * number of contiguous slots. The window size defines the workload granularity for each CUDA + * @note This is a public interface used to control storage bucket size. A bucket consists of one + * or multiple contiguous slots. The bucket size defines the workload granularity for each CUDA * thread, i.e., how many slots a thread would concurrently operate on when performing modify or - * lookup operations. cuCollections uses the AoW storage to supersede the raw flat slot storage due - * to its superior granularity control: When window size equals one, AoW performs the same as the - * flat storage. If the underlying operation is more memory bandwidth bound, e.g., high occupancy - * multimap operations, a larger window size can reduce the length of probing sequences thus improve - * runtime performance. + * lookup operations. cuCollections uses the array of bucket storage to supersede the raw flat slot + * storage due to its superior granularity control: When bucket size equals one, array of buckets + * performs the same as the flat storage. If the underlying operation is more memory bandwidth + * bound, e.g., high occupancy multimap operations, a larger bucket size can reduce the length of + * probing sequences thus improve runtime performance. * - * @tparam WindowSize Number of elements per window storage + * @tparam BucketSize Number of elements per bucket storage */ -template +template class storage { public: - /// Number of slots per window storage - static constexpr int32_t window_size = WindowSize; + /// Number of slots per bucket storage + static constexpr int32_t bucket_size = BucketSize; /// Type of implementation details template - using impl = aow_storage; + using impl = bucket_storage; }; } // namespace cuco diff --git a/tests/static_map/capacity_test.cu b/tests/static_map/capacity_test.cu index fb1ae7491..db93006d7 100644 --- a/tests/static_map/capacity_test.cu +++ b/tests/static_map/capacity_test.cu @@ -73,7 +73,7 @@ TEST_CASE("Static map capacity", "") constexpr std::size_t num_keys{400}; - SECTION("Static window extent can be evaluated at build time.") + SECTION("Static bucket extent can be evaluated at build time.") { std::size_t constexpr gold_extent = 211; @@ -89,8 +89,8 @@ TEST_CASE("Static map capacity", "") map{extent_type{}, cuco::empty_key{-1}, cuco::empty_value{-1}}; auto ref = map.ref(cuco::insert); - auto const num_windows = ref.window_extent(); - STATIC_REQUIRE(static_cast(num_windows) == gold_extent); + auto const num_buckets = ref.bucket_extent(); + STATIC_REQUIRE(static_cast(num_buckets) == gold_extent); } SECTION("Dynamic extent is evaluated at run time.") diff --git a/tests/static_set/capacity_test.cu b/tests/static_set/capacity_test.cu index acfa8d5d8..433e25287 100644 --- a/tests/static_set/capacity_test.cu +++ b/tests/static_set/capacity_test.cu @@ -60,7 +60,7 @@ TEST_CASE("Static set capacity", "") constexpr std::size_t num_keys{400}; - SECTION("Static window extent can be evaluated at build time.") + SECTION("Static bucket extent can be evaluated at build time.") { std::size_t constexpr gold_extent = 211; @@ -70,8 +70,8 @@ TEST_CASE("Static set capacity", "") set{extent_type{}, cuco::empty_key{-1}}; auto ref = set.ref(cuco::insert); - auto const num_windows = ref.window_extent(); - STATIC_REQUIRE(static_cast(num_windows) == gold_extent); + auto const num_buckets = ref.bucket_extent(); + STATIC_REQUIRE(static_cast(num_buckets) == gold_extent); } SECTION("Dynamic extent is evaluated at run time.") diff --git a/tests/utility/storage_test.cu b/tests/utility/storage_test.cu index 8fb923fb8..74da3ffd7 100644 --- a/tests/utility/storage_test.cu +++ b/tests/utility/storage_test.cu @@ -16,7 +16,7 @@ #include -#include +#include #include #include #include @@ -31,7 +31,7 @@ TEMPLATE_TEST_CASE_SIG("Storage tests", (int64_t, int64_t)) { constexpr std::size_t size{1'000}; - constexpr int window_size{2}; + constexpr int bucket_size{2}; constexpr std::size_t gold_capacity{2'000}; using allocator_type = cuco::cuda_allocator; @@ -40,7 +40,7 @@ TEMPLATE_TEST_CASE_SIG("Storage tests", SECTION("Initialize empty storage is allowed.") { auto s = cuco:: - aow_storage, window_size, cuco::extent, allocator_type>{ + aow_storage, bucket_size, cuco::extent, allocator_type>{ cuco::extent{0}, allocator}; s.initialize(cuco::pair{1, 1}); @@ -49,35 +49,35 @@ TEMPLATE_TEST_CASE_SIG("Storage tests", SECTION("Allocate array of pairs with AoS storage.") { auto s = cuco:: - aow_storage, window_size, cuco::extent, allocator_type>( + aow_storage, bucket_size, cuco::extent, allocator_type>( cuco::extent{size}, allocator); - auto const num_windows = s.num_windows(); + auto const num_buckets = s.num_buckets(); auto const capacity = s.capacity(); - REQUIRE(num_windows == size); + REQUIRE(num_buckets == size); REQUIRE(capacity == gold_capacity); } SECTION("Allocate array of pairs with AoS storage with static extent.") { using extent_type = cuco::extent; - auto s = cuco::aow_storage, window_size, extent_type, allocator_type>( + auto s = cuco::aow_storage, bucket_size, extent_type, allocator_type>( extent_type{}, allocator); - auto const num_windows = s.num_windows(); + auto const num_buckets = s.num_buckets(); auto const capacity = s.capacity(); - STATIC_REQUIRE(num_windows == size); + STATIC_REQUIRE(num_buckets == size); STATIC_REQUIRE(capacity == gold_capacity); } SECTION("Allocate array of keys with AoS storage.") { - auto s = cuco::aow_storage, allocator_type>( + auto s = cuco::aow_storage, allocator_type>( cuco::extent{size}, allocator); - auto const num_windows = s.num_windows(); + auto const num_buckets = s.num_buckets(); auto const capacity = s.capacity(); - REQUIRE(num_windows == size); + REQUIRE(num_buckets == size); REQUIRE(capacity == gold_capacity); } @@ -85,11 +85,11 @@ TEMPLATE_TEST_CASE_SIG("Storage tests", { using extent_type = cuco::extent; auto s = - cuco::aow_storage(extent_type{}, allocator); - auto const num_windows = s.num_windows(); + cuco::aow_storage(extent_type{}, allocator); + auto const num_buckets = s.num_buckets(); auto const capacity = s.capacity(); - STATIC_REQUIRE(num_windows == size); + STATIC_REQUIRE(num_buckets == size); STATIC_REQUIRE(capacity == gold_capacity); } }