Skip to content

Commit

Permalink
Add map/set ctor overloads taking load factor
Browse files Browse the repository at this point in the history
  • Loading branch information
PointKernel committed Sep 26, 2023
1 parent 2c863f9 commit 30788cb
Show file tree
Hide file tree
Showing 8 changed files with 326 additions and 13 deletions.
23 changes: 12 additions & 11 deletions include/cuco/detail/open_addressing_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -136,13 +136,13 @@ class open_addressing_impl {
}

/**
* @brief Constructs a statically-sized open addressing data structure with the specified initial
* capacity, sentinel values and CUDA stream.
* @brief Constructs a statically-sized open addressing data structure with the number of elements
* to insert, sentinel values, the desired load factor and CUDA stream
*
* @note The actual capacity depends on the given `capacity`, the probing scheme, CG size, and the
* window size and it is computed via the `make_window_extent` factory. Insert operations will not
* automatically grow the container. Attempting to insert more unique keys than the capacity of
* the container results in undefined behavior.
* @note The actual capacity depends on the given `n`, the probing scheme, CG size, the desired
* load factor and the window size and it is computed via the `make_window_extent` factory. Insert
* operations will not automatically grow the container. Attempting to insert more unique keys
* than the capacity of the container results in undefined behavior.
* @note Any `*_sentinel`s are reserved and behavior is undefined when attempting to insert
* this sentinel value.
* @note If a non-default CUDA stream is provided, the caller is responsible for synchronizing the
Expand All @@ -152,19 +152,20 @@ class open_addressing_impl {
* @throw If the desired occupancy is no bigger than zero
* @throw If the desired occupancy is larger than one
*
* @param capacity The requested lower-bound size
* @param n The number of elements to insert
* @param desired_load_factor The desired load factor of the container, e.g., 0.5 implies a 50%
* load factor
* @param empty_key_sentinel The reserved key value for empty slots
* @param empty_slot_sentinel The reserved slot value for empty slots
* @param desired_load_factor The desired load factor of the container
* @param pred Key equality binary predicate
* @param probing_scheme Probing scheme
* @param alloc Allocator used for allocating device storage
* @param stream CUDA stream used to initialize the data structure
*/
constexpr open_addressing_impl(Extent capacity,
constexpr open_addressing_impl(Extent n,
double desired_load_factor,
Key empty_key_sentinel,
Value empty_slot_sentinel,
double desired_load_factor,
KeyEqual const& pred,
ProbingScheme const& probing_scheme,
Allocator const& alloc,
Expand All @@ -174,7 +175,7 @@ class open_addressing_impl {
predicate_{pred},
probing_scheme_{probing_scheme},
storage_{make_window_extent<open_addressing_impl>(
static_cast<size_type>(static_cast<double>(capacity) / desired_load_factor)),
static_cast<size_type>(static_cast<double>(n) / desired_load_factor)),
alloc}
{
CUCO_EXPECTS(desired_load_factor > 0., "Desired occupancy must be larger than zero");
Expand Down
29 changes: 29 additions & 0 deletions include/cuco/detail/static_map/static_map.inl
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,35 @@ constexpr static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator,
{
}

template <class Key,
class T,
class Extent,
cuda::thread_scope Scope,
class KeyEqual,
class ProbingScheme,
class Allocator,
class Storage>
constexpr static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::
static_map(Extent n,
double desired_load_factor,
empty_key<Key> empty_key_sentinel,
empty_value<T> empty_value_sentinel,
KeyEqual const& pred,
ProbingScheme const& probing_scheme,
Allocator const& alloc,
cuda_stream_ref stream)
: impl_{std::make_unique<impl_type>(n,
desired_load_factor,
empty_key_sentinel,
cuco::pair{empty_key_sentinel, empty_value_sentinel},
pred,
probing_scheme,
alloc,
stream)},
empty_value_sentinel_{empty_value_sentinel}
{
}

template <class Key,
class T,
class Extent,
Expand Down
26 changes: 26 additions & 0 deletions include/cuco/detail/static_set/static_set.inl
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,32 @@ constexpr static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Sto
{
}

template <class Key,
class Extent,
cuda::thread_scope Scope,
class KeyEqual,
class ProbingScheme,
class Allocator,
class Storage>
constexpr static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::static_set(
Extent n,
double desired_load_factor,
empty_key<Key> empty_key_sentinel,
KeyEqual const& pred,
ProbingScheme const& probing_scheme,
Allocator const& alloc,
cuda_stream_ref stream)
: impl_{std::make_unique<impl_type>(n,
desired_load_factor,
empty_key_sentinel,
empty_key_sentinel,
pred,
probing_scheme,
alloc,
stream)}
{
}

template <class Key,
class Extent,
cuda::thread_scope Scope,
Expand Down
35 changes: 34 additions & 1 deletion include/cuco/static_map.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ class static_map {

/**
* @brief Constructs a statically-sized map with the specified initial capacity, sentinel values
* and CUDA stream.
* and CUDA stream
*
* The actual map capacity depends on the given `capacity`, the probing scheme, CG size, and the
* window size and it is computed via the `make_window_extent` factory. Insert operations will not
Expand Down Expand Up @@ -184,6 +184,39 @@ class static_map {
Allocator const& alloc = {},
cuda_stream_ref stream = {});

/**
* @brief Constructs a statically-sized map with the number of elements to insert, sentinel
* values, the desired load factor, and CUDA stream
*
* The actual map capacity depends on the given `n`, the probing scheme, CG size, the desired load
* factor and the window size and it is computed via the `make_window_extent` factory. Insert
* operations will not automatically grow the map. Attempting to insert more unique keys than the
* capacity of the map results in undefined behavior.
*
* @note Any `*_sentinel`s are reserved and behavior is undefined when attempting to insert
* this sentinel value.
* @note If a non-default CUDA stream is provided, the caller is responsible for synchronizing the
* stream before the object is first used.
*
* @param n The number of elements to insert
* @param desired_load_factor The desired load factor of the container, e.g., 0.5 implies a 50%
* load factor
* @param empty_key_sentinel The reserved key value for empty slots
* @param empty_value_sentinel The reserved mapped value for empty slots
* @param pred Key equality binary predicate
* @param probing_scheme Probing scheme
* @param alloc Allocator used for allocating device storage
* @param stream CUDA stream used to initialize the map
*/
constexpr static_map(Extent n,
double desired_load_factor,
empty_key<Key> empty_key_sentinel,
empty_value<T> empty_value_sentinel,
KeyEqual const& pred = {},
ProbingScheme const& probing_scheme = {},
Allocator const& alloc = {},
cuda_stream_ref stream = {});

/**
* @brief Erases all elements from the container. After this call, `size()` returns zero.
* Invalidates any references, pointers, or iterators referring to contained elements.
Expand Down
33 changes: 32 additions & 1 deletion include/cuco/static_set.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ class static_set {

/**
* @brief Constructs a statically-sized set with the specified initial capacity, sentinel values
* and CUDA stream.
* and CUDA stream
*
* The actual set capacity depends on the given `capacity`, the probing scheme, CG size, and the
* window size and it is computed via the `make_window_extent` factory. Insert operations will not
Expand All @@ -157,6 +157,37 @@ class static_set {
Allocator const& alloc = {},
cuda_stream_ref stream = {});

/**
* @brief Constructs a statically-sized set with the specified initial capacity, sentinel values,
* the desired load factor, and CUDA stream
*
* The actual set capacity depends on the given `n`, the probing scheme, CG size, the desired load
* factor and the window size and it is computed via the `make_window_extent` factory. Insert
* operations will not automatically grow the set. Attempting to insert more unique keys than the
* capacity of the map results in undefined behavior.
*
* @note Any `*_sentinel`s are reserved and behavior is undefined when attempting to insert
* this sentinel value.
* @note If a non-default CUDA stream is provided, the caller is responsible for synchronizing the
* stream before the object is first used.
*
* @param n The number of elements to insert
* @param desired_load_factor The desired load factor of the container, e.g., 0.5 implies a 50%
* load factor
* @param empty_key_sentinel The reserved key value for empty slots
* @param pred Key equality binary predicate
* @param probing_scheme Probing scheme
* @param alloc Allocator used for allocating device storage
* @param stream CUDA stream used to initialize the set
*/
constexpr static_set(Extent n,
double desired_load_factor,
empty_key<Key> empty_key_sentinel,
KeyEqual const& pred = {},
ProbingScheme const& probing_scheme = {},
Allocator const& alloc = {},
cuda_stream_ref stream = {});

/**
* @brief Erases all elements from the container. After this call, `size()` returns zero.
* Invalidates any references, pointers, or iterators referring to contained elements.
Expand Down
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ ConfigureTest(STATIC_SET_TEST
###################################################################################################
# - static_map tests ------------------------------------------------------------------------------
ConfigureTest(STATIC_MAP_TEST
static_map/capacity_test.cu
static_map/custom_type_test.cu
static_map/duplicate_keys_test.cu
static_map/erase_test.cu
Expand Down
162 changes: 162 additions & 0 deletions tests/static_map/capacity_test.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cuco/static_map.cuh>

#include <catch2/catch_test_macros.hpp>

TEST_CASE("Static map capacity", "")
{
using Key = int32_t;
using T = int32_t;
using ProbeT = cuco::experimental::double_hashing<1, cuco::default_hash_function<Key>>;
using Equal = thrust::equal_to<Key>;
using AllocatorT = cuco::cuda_allocator<std::byte>;
using StorageT = cuco::experimental::storage<2>;

SECTION("zero capacity is allowed.")
{
auto constexpr gold_capacity = 4;

using extent_type = cuco::experimental::extent<std::size_t, 0>;
cuco::experimental::static_map<Key,
T,
extent_type,
cuda::thread_scope_device,
Equal,
ProbeT,
AllocatorT,
StorageT>
map{extent_type{}, cuco::empty_key<Key>{-1}, cuco::empty_value<T>{-1}};
auto const capacity = map.capacity();
REQUIRE(capacity == gold_capacity);

auto ref = map.ref(cuco::experimental::insert);
auto const ref_capacity = ref.capacity();
REQUIRE(ref_capacity == gold_capacity);
}

SECTION("negative capacity (ikr -_-||) is also allowed.")
{
auto constexpr gold_capacity = 4;

using extent_type = cuco::experimental::extent<int32_t>;
cuco::experimental::static_map<Key,
T,
extent_type,
cuda::thread_scope_device,
Equal,
ProbeT,
AllocatorT,
StorageT>
map{extent_type{-10}, cuco::empty_key<Key>{-1}, cuco::empty_value<T>{-1}};
auto const capacity = map.capacity();
REQUIRE(capacity == gold_capacity);

auto ref = map.ref(cuco::experimental::insert);
auto const ref_capacity = ref.capacity();
REQUIRE(ref_capacity == gold_capacity);
}

constexpr std::size_t num_keys{400};

SECTION("Dynamic extent is evaluated at run time.")
{
auto constexpr gold_capacity = 422; // 211 x 2

using extent_type = cuco::experimental::extent<std::size_t>;
cuco::experimental::static_map<Key,
T,
extent_type,
cuda::thread_scope_device,
Equal,
ProbeT,
AllocatorT,
StorageT>
map{num_keys, cuco::empty_key<Key>{-1}, cuco::empty_value<T>{-1}};
auto const capacity = map.capacity();
REQUIRE(capacity == gold_capacity);

auto ref = map.ref(cuco::experimental::insert);
auto const ref_capacity = ref.capacity();
REQUIRE(ref_capacity == gold_capacity);
}

SECTION("map can be constructed from plain integer.")
{
auto constexpr gold_capacity = 422; // 211 x 2

cuco::experimental::static_map<Key,
T,
std::size_t,
cuda::thread_scope_device,
Equal,
ProbeT,
AllocatorT,
StorageT>
map{num_keys, cuco::empty_key<Key>{-1}, cuco::empty_value<T>{-1}};
auto const capacity = map.capacity();
REQUIRE(capacity == gold_capacity);

auto ref = map.ref(cuco::experimental::insert);
auto const ref_capacity = ref.capacity();
REQUIRE(ref_capacity == gold_capacity);
}

SECTION("map can be constructed from plain integer and load factor.")
{
auto constexpr gold_capacity = 502; // 251 x 2

cuco::experimental::static_map<Key,
T,
std::size_t,
cuda::thread_scope_device,
Equal,
ProbeT,
AllocatorT,
StorageT>
map{num_keys, 0.8, cuco::empty_key<Key>{-1}, cuco::empty_value<T>{-1}};
auto const capacity = map.capacity();
REQUIRE(capacity == gold_capacity);

auto ref = map.ref(cuco::experimental::insert);
auto const ref_capacity = ref.capacity();
REQUIRE(ref_capacity == gold_capacity);
}

SECTION("Dynamic extent is evaluated at run time.")
{
auto constexpr gold_capacity = 412; // 103 x 2 x 2

using probe = cuco::experimental::linear_probing<2, cuco::default_hash_function<Key>>;
auto map = cuco::experimental::static_map<Key,
T,
cuco::experimental::extent<std::size_t>,
cuda::thread_scope_device,
Equal,
probe,
AllocatorT,
StorageT>{
num_keys, cuco::empty_key<Key>{-1}, cuco::empty_value<T>{-1}};

auto const capacity = map.capacity();
REQUIRE(capacity == gold_capacity);

auto ref = map.ref(cuco::experimental::insert);
auto const ref_capacity = ref.capacity();
REQUIRE(ref_capacity == gold_capacity);
}
}
Loading

0 comments on commit 30788cb

Please sign in to comment.