diff --git a/include/cuco/detail/dynamic_map/dynamic_map.inl b/include/cuco/detail/dynamic_map/dynamic_map.inl new file mode 100644 index 000000000..4d689bbba --- /dev/null +++ b/include/cuco/detail/dynamic_map/dynamic_map.inl @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +namespace cuco { +namespace experimental { + +template +constexpr dynamic_map:: + dynamic_map(Extent initial_capacity, + empty_key empty_key_sentinel, + empty_value empty_value_sentinel, + KeyEqual const& pred, + ProbingScheme const& probing_scheme, + cuda_thread_scope scope, + Storage storage, + Allocator const& alloc, + cuda::stream_ref stream) + : size_{0}, + capacity_{initial_capacity}, + min_insert_size_{static_cast(1E4)}, + max_load_factor_{0.60}, + alloc_{alloc} +{ + submaps_.push_back( + std::make_unique< + cuco::static_map>( + initial_capacity, + empty_key_sentinel, + empty_value_sentinel, + pred, + probing_scheme, + scope, + storage, + alloc, + stream)); +} + +template +template +void dynamic_map::insert( + InputIt first, InputIt last, cuda::stream_ref stream) +{ + auto num_to_insert = cuco::detail::distance(first, last); + this->reserve(size_ + num_to_insert, stream); + + uint32_t submap_idx = 0; + while (num_to_insert > 0) { + auto& cur = submaps_[submap_idx]; + + auto capacity_remaining = max_load_factor_ * cur->capacity() - cur->size(); + // If we are tying to insert some of the remaining keys into this submap, we can insert + // only if we meet the minimum insert size. + if (capacity_remaining >= min_insert_size_) { + auto const n = std::min(static_cast(capacity_remaining), num_to_insert); + + std::size_t h_num_successes = cur->insert(first, first + n, stream); + + size_ += h_num_successes; + first += n; + num_to_insert -= n; + } + submap_idx++; + } +} + +template +void dynamic_map::reserve( + size_type n, cuda::stream_ref stream) +{ + size_type num_elements_remaining = n; + uint32_t submap_idx = 0; + while (num_elements_remaining > 0) { + std::size_t submap_capacity; + + // if the submap already exists + if (submap_idx < submaps_.size()) { + submap_capacity = submaps_[submap_idx]->capacity(); + } + // if the submap does not exist yet, create it + else { + empty_key empty_key_sentinel{submaps_.front()->empty_key_sentinel()}; + empty_value empty_value_sentinel{submaps_.front()->empty_value_sentinel()}; + + submap_capacity = capacity_; + submaps_.push_back(std::make_unique(submap_capacity, + empty_key_sentinel, + empty_value_sentinel, + KeyEqual{}, + ProbingScheme{}, + cuda_thread_scope{}, + Storage{}, + alloc_, + stream)); + capacity_ *= 2; + } + + num_elements_remaining -= max_load_factor_ * submap_capacity - min_insert_size_; + submap_idx++; + } +} + +template +template +void dynamic_map::contains( + InputIt first, InputIt last, OutputIt output_begin, cuda::stream_ref stream) const +{ + auto num_keys = cuco::detail::distance(first, last); + std::size_t traversed = 0; + uint32_t submap_idx = 0; + while (num_keys > 0 && submap_idx < submaps_.size()) { + const auto& cur = submaps_[submap_idx]; + const size_t cur_size = cur->size(); + const size_t num_keys_to_process = + std::min(static_cast(cur_size), num_keys); + CUCO_CUDA_TRY(cudaStreamSynchronize(stream.get())); + + cur->contains(first, first + num_keys_to_process, output_begin + traversed, stream); + + traversed += num_keys_to_process; + num_keys -= num_keys_to_process; + submap_idx++; + first += num_keys_to_process; + } +} + +} // namespace experimental +} // namespace cuco diff --git a/include/cuco/detail/static_map/static_map.inl b/include/cuco/detail/static_map/static_map.inl index ea3ff9e28..649e180d8 100644 --- a/include/cuco/detail/static_map/static_map.inl +++ b/include/cuco/detail/static_map/static_map.inl @@ -325,7 +325,7 @@ template void static_map:: insert_or_assign(InputIt first, InputIt last, cuda::stream_ref stream) { - return this->insert_or_assign_async(first, last, stream); + this->insert_or_assign_async(first, last, stream); stream.wait(); } @@ -363,7 +363,7 @@ template void static_map:: insert_or_apply(InputIt first, InputIt last, Op op, cuda::stream_ref stream) { - return this->insert_or_apply_async(first, last, op, stream); + this->insert_or_apply_async(first, last, op, stream); stream.wait(); } diff --git a/include/cuco/dynamic_map.cuh b/include/cuco/dynamic_map.cuh index c68c4ddad..4520e87b9 100644 --- a/include/cuco/dynamic_map.cuh +++ b/include/cuco/dynamic_map.cuh @@ -32,6 +32,143 @@ namespace cuco { +namespace experimental { +/** + * @brief A GPU-accelerated, unordered, associative container of key-value + * pairs with unique keys. + * + * This container automatically grows its capacity as necessary until device memory runs out. + * + * @tparam Key The type of the keys. + * @tparam T The type of the mapped values. + * @tparam Extent The type representing the extent of the container. + * @tparam Scope The thread scope for the container's operations. + * @tparam KeyEqual The equality comparison function for keys. + * @tparam ProbingScheme The probing scheme for resolving hash collisions. + * @tparam Allocator The allocator used for memory management. + * @tparam Storage The storage policy for the container. + */ +template , + cuda::thread_scope Scope = cuda::thread_scope_device, + class KeyEqual = thrust::equal_to, + class ProbingScheme = cuco::linear_probing<4, // CG size + cuco::default_hash_function>, + class Allocator = cuco::cuda_allocator>, + class Storage = cuco::storage<1>> +class dynamic_map { + using map_type = static_map; + + public: + static constexpr auto thread_scope = map_type::thread_scope; ///< CUDA thread scope + + using key_type = typename map_type::key_type; ///< Key type + using value_type = typename map_type::value_type; ///< Key-value pair type + using size_type = typename map_type::size_type; ///< Size type + using key_equal = typename map_type::key_equal; ///< Key equality comparator type + using mapped_type = T; ///< Payload type + + dynamic_map(dynamic_map const&) = delete; + dynamic_map& operator=(dynamic_map const&) = delete; + + dynamic_map(dynamic_map&&) = default; ///< Move constructor + + /** + * @brief Replaces the contents of the container with another container. + * + * @return Reference of the current map object + */ + dynamic_map& operator=(dynamic_map&&) = default; + ~dynamic_map() = default; + + /** + * @brief Constructs a dynamically-sized map with erase capability. + * + * The capacity of the map will automatically increase as the user adds key/value pairs using + * `insert`. + * + * Capacity increases by a factor of growth_factor each time the size of the map exceeds a + * threshold occupancy. The performance of `find` and `contains` gradually decreases each time the + * map's capacity grows. + * + * @param initial_capacity The initial number of slots in the map + * @param empty_key_sentinel The reserved key value for empty slots + * @param empty_value_sentinel The reserved mapped value for empty slots + * @param pred Key equality binary predicate + * @param probing_scheme Probing scheme + * @param scope The scope in which operations will be performed + * @param storage Kind of storage to use + * @param alloc Allocator used for allocating device storage + * @param stream CUDA stream used to initialize the map + */ + constexpr dynamic_map(Extent initial_capacity, + empty_key empty_key_sentinel, + empty_value empty_value_sentinel, + KeyEqual const& pred = {}, + ProbingScheme const& probing_scheme = {}, + cuda_thread_scope scope = {}, + Storage storage = {}, + Allocator const& alloc = {}, + cuda::stream_ref stream = {}); + + /** + * @brief Grows the capacity of the map so there is enough space for `n` key/value pairs. + * + * If there is already enough space for `n` key/value pairs, the capacity remains the same. + * + * @param n The number of key value pairs for which there must be space + * @param stream Stream used for executing the kernels + */ + void reserve(size_type n, cuda::stream_ref stream); + + /** + * @brief Inserts all key/value pairs in the range `[first, last)`. + * + * If multiple keys in `[first, last)` compare equal, it is unspecified which + * element is inserted. + * + * @tparam InputIt Device accessible input iterator whose `value_type` is + * convertible to the map's `value_type` + * @param first Beginning of the sequence of key/value pairs + * @param last End of the sequence of key/value pairs + * @param stream Stream used for executing the kernels + */ + template + void insert(InputIt first, InputIt last, cuda::stream_ref stream = {}); + + /** + * @brief Indicates whether the keys in the range `[first, last)` are contained in the map. + * + * Writes a `bool` to `(output + i)` indicating if the key `*(first + i)` exists in the map. + * + * @tparam InputIt Device accessible input iterator + * @tparam OutputIt Device accessible output iterator whose `value_type` is + * convertible to the map's `mapped_type` + * + * @param first Beginning of the sequence of keys + * @param last End of the sequence of keys + * @param output_begin Beginning of the sequence of booleans for the presence of each key + * @param stream Stream used for executing the kernels + */ + template + void contains(InputIt first, + InputIt last, + OutputIt output_begin, + cuda::stream_ref stream = {}) const; + + private: + size_type size_{}; ///< Number of keys in the map + size_type capacity_{}; ///< Maximum number of keys that can be inserted + + std::vector> submaps_; ///< vector of pointers to each submap + size_type min_insert_size_{}; ///< min remaining capacity of submap for insert + float max_load_factor_{}; ///< Maximum load factor + Allocator alloc_{}; ///< Allocator passed to submaps to allocate their device storage +}; + +} // namespace experimental + /** * @brief A GPU-accelerated, unordered, associative container of key-value * pairs with unique keys @@ -361,3 +498,4 @@ class dynamic_map { } // namespace cuco #include +#include diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index fd434f175..f5af5002a 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -912,6 +912,18 @@ class static_map { mapped_type empty_value_sentinel_; ///< Sentinel value that indicates an empty payload }; +namespace experimental { +template +class dynamic_map; +} + template class dynamic_map; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 639c7f3eb..8b760d18f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -93,6 +93,7 @@ ConfigureTest(STATIC_MAP_TEST # - dynamic_map tests ----------------------------------------------------------------------------- ConfigureTest(DYNAMIC_MAP_TEST dynamic_map/unique_sequence_test.cu + dynamic_map/unique_sequence_test_experimental.cu dynamic_map/erase_test.cu) ################################################################################################### diff --git a/tests/dynamic_map/unique_sequence_test_experimental.cu b/tests/dynamic_map/unique_sequence_test_experimental.cu new file mode 100644 index 000000000..d953e320c --- /dev/null +++ b/tests/dynamic_map/unique_sequence_test_experimental.cu @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +TEMPLATE_TEST_CASE_SIG("experimental::dynamic_map: unique sequence", + "", + ((typename Key, typename T), Key, T), + (int32_t, int32_t), + (int32_t, int64_t), + (int64_t, int32_t), + (int64_t, int64_t)) +{ + constexpr std::size_t num_keys{1'000'000}; + + cuco::experimental::dynamic_map map{ + 30'000'000, cuco::empty_key{-1}, cuco::empty_value{-1}}; + + thrust::device_vector d_keys(num_keys); + thrust::device_vector d_values(num_keys); + + thrust::sequence(thrust::device, d_keys.begin(), d_keys.end()); + thrust::sequence(thrust::device, d_values.begin(), d_values.end()); + + auto pairs_begin = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), + cuda::proclaim_return_type>( + [] __device__(auto i) { return cuco::pair(i, i); })); + + thrust::device_vector d_results(num_keys); + thrust::device_vector d_contained(num_keys); + + // bulk function test cases + + SECTION("All inserted keys-value pairs should be contained") + { + map.insert(pairs_begin, pairs_begin + num_keys); + map.contains(d_keys.begin(), d_keys.end(), d_contained.begin()); + + REQUIRE(cuco::test::all_of(d_contained.begin(), d_contained.end(), thrust::identity{})); + } + + SECTION("Non-inserted keys-value pairs should not be contained") + { + // segfaults + map.contains(d_keys.begin(), d_keys.end(), d_contained.begin()); + + REQUIRE(cuco::test::none_of(d_contained.begin(), d_contained.end(), thrust::identity{})); + } +}