NVIDIA · sleeepyjack · May 25, 2023 · Apr 19, 2023 · Apr 19, 2023 · Apr 19, 2023
@@ -78,3 +78,6 @@ ConfigureBench(DYNAMIC_MAP_BENCH
   hash_table/dynamic_map/find_bench.cu
   hash_table/dynamic_map/contains_bench.cu
   hash_table/dynamic_map/erase_bench.cu)
+
+ConfigureBench(HASH_BENCH
-ConfigureBench(HASH_BENCH
+###################################################################################################
+# - hash benchmarks -------------------------------------------------------------------------------
+ConfigureBench(HASH_BENCH
-ConfigureBench(HASH_BENCH
+###################################################################################################
+# - hash benchmarks -------------------------------------------------------------------------------
+ConfigureBench(HASH_BENCH
+  hash_bench.cu)
@@ -35,7 +35,9 @@ auto constexpr SKEW          = 0.5;
 auto constexpr BATCH_SIZE    = 1'000'000;
 auto constexpr INITIAL_SIZE  = 50'000'000;
 
-auto const N_RANGE             = nvbench::range(10'000'000, 100'000'000, 20'000'000);
+auto const N_RANGE = nvbench::range(10'000'000, 100'000'000, 20'000'000);
+auto const N_RANGE_CACHE =
+  std::vector<nvbench::int64_t>{8'000, 80'000, 800'000, 8'000'000, 80'000'000};
 auto const OCCUPANCY_RANGE     = nvbench::range(0.1, 0.9, 0.1);
 auto const MULTIPLICITY_RANGE  = std::vector<nvbench::int64_t>{1, 2, 4, 8, 16};
 auto const MATCHING_RATE_RANGE = nvbench::range(0.1, 1., 0.1);

@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <defaults.hpp>
+#include <utils.hpp>
+
+#include <cuco/detail/utils.hpp>
+#include <cuco/hash_functions.cuh>
+
+#include <nvbench/nvbench.cuh>
+
+#include <thrust/device_vector.h>
+
+#include <cstdint>
+
+using namespace cuco::benchmark;
+using namespace cuco::utility;
+
+template <int32_t Words>
+struct large_key {
+  constexpr __host__ __device__ large_key(int32_t seed) noexcept
+  {
+#pragma unroll Words
+    for (int32_t i = 0; i < Words; ++i) {
+      data_[i] = seed;
+    }
+  }
+
+ private:
+  int32_t data_[Words];
+};
+
+template <int32_t BlockSize, typename Hasher, typename OutputIt>
+__global__ void hash_bench_kernel(Hasher hash,
+                                  cuco::detail::index_type n,
+                                  OutputIt out,
+                                  bool materialize_result)
+{
+  cuco::detail::index_type const gid         = BlockSize * blockIdx.x + threadIdx.x;
+  cuco::detail::index_type const loop_stride = gridDim.x * BlockSize;
+  cuco::detail::index_type idx               = gid;
+  typename Hasher::result_type agg           = 0;
+
+  while (idx < n) {
+    typename Hasher::argument_type key(idx);
+    for (int32_t i = 0; i < 100; ++i) {  // execute hash func 100 times
+      agg += hash(key);
+    }
+    idx += loop_stride;
+  }
+
+  if (materialize_result) { out[gid] = agg; }
+}
+
+/**
+ * @brief A benchmark evaluating performance of various hash functions
+ */
+template <typename Hash>
+void hash_eval(nvbench::state& state, nvbench::type_list<Hash>)
+{
+  bool const materialize_result = false;
+  constexpr auto block_size     = 128;
+  auto const num_keys           = state.get_int64_or_default("NumInputs", defaults::N * 10);
+  auto const grid_size          = SDIV(num_keys, block_size * 16);
+
+  thrust::device_vector<typename Hash::result_type> hash_values((materialize_result) ? num_keys
+                                                                                     : 1);
+
+  state.add_element_count(num_keys);
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    hash_bench_kernel<block_size><<<grid_size, block_size, 0, launch.get_stream()>>>(
+      Hash{}, num_keys, hash_values.begin(), materialize_result);
+  });
+}
+
+NVBENCH_BENCH_TYPES(
+  hash_eval,
+  NVBENCH_TYPE_AXES(nvbench::type_list<cuco::murmurhash3_32<nvbench::int32_t>,
+                                       cuco::murmurhash3_32<nvbench::int64_t>,
+                                       cuco::murmurhash3_32<large_key<32>>,  // 32*4bytes
+                                       cuco::xxhash_32<nvbench::int32_t>,
+                                       cuco::xxhash_32<nvbench::int64_t>,
+                                       cuco::xxhash_32<large_key<32>>,
+                                       cuco::xxhash_64<nvbench::int32_t>,
+                                       cuco::xxhash_64<nvbench::int64_t>,
+                                       cuco::xxhash_64<large_key<32>>,
+                                       cuco::murmurhash3_fmix_32<nvbench::int32_t>,
+                                       cuco::murmurhash3_fmix_64<nvbench::int64_t>>))
+  .set_name("hash_function_eval")
+  .set_type_axes_names({"Hash"})
+  .set_max_noise(defaults::MAX_NOISE);
@@ -73,3 +73,10 @@ NVBENCH_BENCH_TYPES(static_set_contains,
   .set_type_axes_names({"Key", "Distribution"})
   .set_max_noise(defaults::MAX_NOISE)
   .add_float64_axis("MatchingRate", defaults::MATCHING_RATE_RANGE);
+
+NVBENCH_BENCH_TYPES(static_set_contains,
+                    NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+                                      nvbench::type_list<distribution::unique>))
+  .set_name("static_set_constains_unique_capacity")
+  .set_type_axes_names({"Key", "Distribution"})
+  .add_int64_axis("NumInputs", defaults::N_RANGE_CACHE);
@@ -75,3 +75,10 @@ NVBENCH_BENCH_TYPES(static_set_find,
   .set_type_axes_names({"Key", "Distribution"})
   .set_max_noise(defaults::MAX_NOISE)
   .add_float64_axis("MatchingRate", defaults::MATCHING_RATE_RANGE);
+
+NVBENCH_BENCH_TYPES(static_set_find,
+                    NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+                                      nvbench::type_list<distribution::unique>))
+  .set_name("static_set_find_unique_capacity")
+  .set_type_axes_names({"Key", "Distribution"})
+  .add_int64_axis("NumInputs", defaults::N_RANGE_CACHE);
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,8 +16,94 @@
 
 #pragma once
 
+#include <cstdint>
+
 namespace cuco::detail {
 
+/**
+ * @brief The 32bit integer finalizer hash function of `MurmurHash3`.
+ *
+ * @throw Key type must be 4 bytes in size
+ *
+ * @tparam Key The type of the values to hash
+ */
+template <typename Key>
+struct MurmurHash3_fmix32 {
+  static_assert(sizeof(Key) == 4, "Key type must be 4 bytes in size.");
+
+  using argument_type = Key;       ///< The type of the values taken as argument
+  using result_type   = uint32_t;  ///< The type of the hash values produced
+
+  /**
+   * @brief Constructs a MurmurHash3_fmix32 hash function with the given `seed`.
+   *
+   * @param seed A custom number to randomize the resulting hash value
+   */
+  __host__ __device__ constexpr MurmurHash3_fmix32(uint32_t seed = 0) : seed_{seed} {}
+
+  /**
+   * @brief Returns a hash value for its argument, as a value of type `result_type`.
+   *
+   * @param key The input argument to hash
+   * @return A resulting hash value for `key`
+   */
+  constexpr result_type __host__ __device__ operator()(Key const& key) const noexcept
+  {
+    uint32_t h = static_cast<uint32_t>(key) ^ seed_;
+    h ^= h >> 16;
+    h *= 0x85ebca6b;
+    h ^= h >> 13;
+    h *= 0xc2b2ae35;
+    h ^= h >> 16;
+    return h;
+  }
+
+ private:
+  uint32_t seed_;
+};
+
+/**
+ * @brief The 64bit integer finalizer hash function of `MurmurHash3`.
+ *
+ * @throw Key type must be 8 bytes in size
+ *
+ * @tparam Key The type of the values to hash
+ */
+template <typename Key>
+struct MurmurHash3_fmix64 {
+  static_assert(sizeof(Key) == 8, "Key type must be 8 bytes in size.");
+
+  using argument_type = Key;       ///< The type of the values taken as argument
+  using result_type   = uint64_t;  ///< The type of the hash values produced
+
+  /**
+   * @brief Constructs a MurmurHash3_fmix64 hash function with the given `seed`.
+   *
+   * @param seed A custom number to randomize the resulting hash value
+   */
+  __host__ __device__ constexpr MurmurHash3_fmix64(uint64_t seed = 0) : seed_{seed} {}
+
+  /**
+   * @brief Returns a hash value for its argument, as a value of type `result_type`.
+   *
+   * @param key The input argument to hash
+   * @return A resulting hash value for `key`
+   */
+  constexpr result_type __host__ __device__ operator()(Key const& key) const noexcept
+  {
+    uint64_t h = static_cast<uint64_t>(key) ^ seed_;
+    h ^= h >> 33;
+    h *= 0xff51afd7ed558ccd;
+    h ^= h >> 33;
+    h *= 0xc4ceb9fe1a85ec53;
+    h ^= h >> 33;
+    return h;
+  }
+
+ private:
+  uint64_t seed_;
+};
+
 /**
  * @brief A `MurmurHash3_32` hash function to hash the given argument on host and device.
  *
@@ -38,15 +124,12 @@ struct MurmurHash3_32 {
   using argument_type = Key;       ///< The type of the values taken as argument
   using result_type   = uint32_t;  ///< The type of the hash values produced
 
-  /// Default constructor
-  __host__ __device__ constexpr MurmurHash3_32() : MurmurHash3_32{0} {}
-
   /**
    * @brief Constructs a MurmurHash3_32 hash function with the given `seed`.
    *
    * @param seed A custom number to randomize the resulting hash value
    */
-  __host__ __device__ constexpr MurmurHash3_32(uint32_t seed) : m_seed(seed) {}
+  __host__ __device__ constexpr MurmurHash3_32(uint32_t seed = 0) : fmix32_{0}, seed_{seed} {}
 
   /**
    * @brief Returns a hash value for its argument, as a value of type `result_type`.
@@ -60,7 +143,7 @@ struct MurmurHash3_32 {
     const uint8_t* const data = (const uint8_t*)&key;
     constexpr int nblocks     = len / 4;
 
-    uint32_t h1           = m_seed;
+    uint32_t h1           = seed_;
     constexpr uint32_t c1 = 0xcc9e2d51;
     constexpr uint32_t c2 = 0x1b873593;
     //----------
@@ -92,7 +175,7 @@ struct MurmurHash3_32 {
     //----------
     // finalization
     h1 ^= len;
-    h1 = fmix32(h1);
+    h1 = fmix32_(h1);
     return h1;
   }
 
@@ -102,16 +185,8 @@ struct MurmurHash3_32 {
     return (x << r) | (x >> (32 - r));
   }
 
-  constexpr __host__ __device__ uint32_t fmix32(uint32_t h) const noexcept
-  {
-    h ^= h >> 16;
-    h *= 0x85ebca6b;
-    h ^= h >> 13;
-    h *= 0xc2b2ae35;
-    h ^= h >> 16;
-    return h;
-  }
-  uint32_t m_seed;
+  MurmurHash3_fmix32<uint32_t> fmix32_;
+  uint32_t seed_;
 };
 
-}  // namespace cuco::detail
+}  //  namespace cuco::detail