NVIDIA · PointKernel · May 19, 2022 · Mar 2, 2022 · Mar 2, 2022 · Mar 2, 2022
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,11 +14,14 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
+#include <synchronization.hpp>
+
 #include <cuco/dynamic_map.cuh>
+
+#include <benchmark/benchmark.h>
+
 #include <iostream>
 #include <random>
-#include <synchronization.hpp>
 
 enum class dist_type { UNIQUE, UNIFORM, GAUSSIAN };
 

diff --git a/benchmarks/hash_table/static_map_bench.cu b/benchmarks/hash_table/static_map_bench.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,13 +14,16 @@
  * limitations under the License.
  */
 
-#include "cuco/static_map.cuh"
+#include <cuco/static_map.cuh>
+
+#include <thrust/device_vector.h>
+#include <thrust/for_each.h>
+
 #include <benchmark/benchmark.h>
+
 #include <fstream>
 #include <iostream>
 #include <random>
-#include <thrust/device_vector.h>
-#include <thrust/for_each.h>
 
 enum class dist_type { UNIQUE, UNIFORM, GAUSSIAN };
 
@@ -145,6 +148,9 @@ static void BM_static_map_search_all(::benchmark::State& state)
 
   for (auto _ : state) {
     map.find(d_keys.begin(), d_keys.end(), d_results.begin());
+    // TODO: get rid of sync and rewrite the benchmark with `nvbench`
+    // once https://github.com/NVIDIA/nvbench/pull/80 is merged
+    cudaDeviceSynchronize();
   }
 
   state.SetBytesProcessed((sizeof(Key) + sizeof(Value)) * int64_t(state.iterations()) *
@@ -202,11 +208,55 @@ BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::UNIQUE)
   ->Apply(generate_size_and_occupancy)
   ->UseManualTime();
 
-BENCHMARK_TEMPLATE(BM_static_map_erase_all, int32_t, int32_t, dist_type::UNIQUE)
+BENCHMARK_TEMPLATE(BM_static_map_search_all, int32_t, int32_t, dist_type::UNIQUE)
   ->Unit(benchmark::kMillisecond)
   ->Apply(generate_size_and_occupancy);
 
-BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::UNIQUE)
+BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::UNIFORM)
+  ->Unit(benchmark::kMillisecond)
+  ->Apply(generate_size_and_occupancy)
+  ->UseManualTime();
+
+BENCHMARK_TEMPLATE(BM_static_map_search_all, int32_t, int32_t, dist_type::UNIFORM)
+  ->Unit(benchmark::kMillisecond)
+  ->Apply(generate_size_and_occupancy);
+
+BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::GAUSSIAN)
+  ->Unit(benchmark::kMillisecond)
+  ->Apply(generate_size_and_occupancy)
+  ->UseManualTime();
+
+BENCHMARK_TEMPLATE(BM_static_map_search_all, int32_t, int32_t, dist_type::GAUSSIAN)
+  ->Unit(benchmark::kMillisecond)
+  ->Apply(generate_size_and_occupancy);
+
+BENCHMARK_TEMPLATE(BM_static_map_insert, int64_t, int64_t, dist_type::UNIQUE)
   ->Unit(benchmark::kMillisecond)
   ->Apply(generate_size_and_occupancy)
-  ->UseManualTime();
+  ->UseManualTime();
+
+BENCHMARK_TEMPLATE(BM_static_map_search_all, int64_t, int64_t, dist_type::UNIQUE)
+  ->Unit(benchmark::kMillisecond)
+  ->Apply(generate_size_and_occupancy);
+
+BENCHMARK_TEMPLATE(BM_static_map_insert, int64_t, int64_t, dist_type::UNIFORM)
+  ->Unit(benchmark::kMillisecond)
+  ->Apply(generate_size_and_occupancy)
+  ->UseManualTime();
+
+BENCHMARK_TEMPLATE(BM_static_map_search_all, int64_t, int64_t, dist_type::UNIFORM)
+  ->Unit(benchmark::kMillisecond)
+  ->Apply(generate_size_and_occupancy);
+
+BENCHMARK_TEMPLATE(BM_static_map_insert, int64_t, int64_t, dist_type::GAUSSIAN)
+  ->Unit(benchmark::kMillisecond)
+  ->Apply(generate_size_and_occupancy)
+  ->UseManualTime();
+
+BENCHMARK_TEMPLATE(BM_static_map_search_all, int64_t, int64_t, dist_type::GAUSSIAN)
+  ->Unit(benchmark::kMillisecond)
+  ->Apply(generate_size_and_occupancy);
+
+BENCHMARK_TEMPLATE(BM_static_map_erase_all, int32_t, int32_t, dist_type::UNIQUE)
+  ->Unit(benchmark::kMillisecond)
+  ->Apply(generate_size_and_occupancy);
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include <random>
+#include <key_generator.hpp>
+
+#include <cuco/static_multimap.cuh>
 
 #include <nvbench/nvbench.cuh>
-#include <thrust/device_vector.h>
 
-#include <cuco/static_multimap.cuh>
-#include <key_generator.hpp>
+#include <thrust/device_vector.h>
 
 /**
  * @brief A benchmark evaluating multi-value `count` performance:

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include <random>
+#include <key_generator.hpp>
+
+#include <cuco/static_multimap.cuh>
 
 #include <nvbench/nvbench.cuh>
-#include <thrust/device_vector.h>
 
-#include <cuco/static_multimap.cuh>
-#include <key_generator.hpp>
+#include <thrust/device_vector.h>
 
 /**
  * @brief A benchmark evaluating multi-value `insert` performance:

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,13 +14,12 @@
  * limitations under the License.
  */
 
+#include <cuco/static_multimap.cuh>
+
 #include <nvbench/nvbench.cuh>
 
-#include <random>
 #include <thrust/device_vector.h>
 
-#include "cuco/static_multimap.cuh"
-
 /**
  * @brief Generates input keys by a given number of repetitions per key.
  *

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include <random>
+#include <key_generator.hpp>
+
+#include <cuco/static_multimap.cuh>
 
 #include <nvbench/nvbench.cuh>
+
 #include <thrust/device_vector.h>
 #include <thrust/iterator/discard_iterator.h>
 
-#include <cuco/static_multimap.cuh>
-#include <key_generator.hpp>
-
 namespace {
 // Custom pair equal
 template <typename Key, typename Value>

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include <random>
+#include <key_generator.hpp>
+
+#include <cuco/static_multimap.cuh>
 
 #include <nvbench/nvbench.cuh>
-#include <thrust/device_vector.h>
 
-#include <cuco/static_multimap.cuh>
-#include <key_generator.hpp>
+#include <thrust/device_vector.h>
 
 /**
  * @brief A benchmark evaluating multi-value query (`count` + `retrieve`) performance:

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,14 +14,13 @@
  * limitations under the License.
  */
 
-#include <cuco/static_multimap.cuh>
 #include <key_generator.hpp>
 
-#include <thrust/device_vector.h>
+#include <cuco/static_multimap.cuh>
 
 #include <nvbench/nvbench.cuh>
 
-#include <random>
+#include <thrust/device_vector.h>
 
 /**
  * @brief A benchmark evaluating multi-value `retrieve` performance:

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 
 // Google Benchmark library
 #include <benchmark/benchmark.h>
+
 #include <cuda_runtime_api.h>
 
 #define BENCH_CUDA_TRY(call)                                                         \
@@ -129,4 +130,4 @@ class cuda_event_timer {
   cudaEvent_t stop_;
   cudaStream_t stream_;
   benchmark::State* p_state;
-};
+};
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
+#include <cuco/static_map.cuh>
+
 #include <thrust/device_vector.h>
 #include <thrust/logical.h>
 #include <thrust/transform.h>
 
-#include <cuco/static_map.cuh>
-
 // User-defined key type
 #ifdef CUCO_NO_INDEPENDENT_THREADS
 struct custom_key_type {

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include <limits>
+#include <cuco/static_map.cuh>
 
 #include <thrust/device_vector.h>
 #include <thrust/sequence.h>
 #include <thrust/transform.h>
 
-#include <cuco/static_map.cuh>
+#include <limits>
 
 int main(void)
 {

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include <limits>
+#include <cuco/static_multimap.cuh>
 
 #include <thrust/device_vector.h>
 #include <thrust/sequence.h>
 #include <thrust/transform.h>
 
-#include <cuco/static_multimap.cuh>
+#include <limits>
 
 int main(void)
 {

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+#include <cub/cub.cuh>
+
+#include <cuda/std/atomic>
+
+#include <cooperative_groups.h>
+
 namespace cuco {
 namespace detail {
 namespace cg = cooperative_groups;
@@ -457,4 +463,4 @@ __global__ void contains(InputIt first,
   }
 }
 }  // namespace detail
-}  // namespace cuco
+}  // namespace cuco
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <cuda_runtime_api.h>
+
 #include <stdexcept>
 #include <string>