NVIDIA · sleeepyjack · Oct 30, 2024 · Oct 25, 2024 · Oct 25, 2024 · Oct 25, 2024
@@ -254,4 +254,7 @@ We plan to add many GPU-accelerated, concurrent data structures to `cuCollection
 `cuco::bloom_filter` implements a Blocked Bloom Filter for approximate set membership queries.
 
 #### Examples:
-- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/bloom_filter/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/clientstate/eJydVm1v20YM_iuE9mF2I78FCwo4L4CXpJuxwsnstEUxD8HpRFuHynfavdgxgvz38U6SLSduMSwBEptHPnz4kEfpOTJojFDSRMO_niORRsNBHOVMLh1bYjSMuEtZFEdGOc399967uYR3cK2KrRbLzEKLt-G0f_pLDJPP45vxCK7vpvd309HD-G7S9b7B_6PgKA2m4GSKGmyGMCoYp3_VSQyfUXsicNrtQ8s7zKPqbB61zwPKVjlYsS1IZcEZJBhhYCFyBHziWFgQErhaFblgkiNshM1Cqgon0IGvFYhKLCN_RhEFfVs0PYHZHXX_k1lbDHu9zWbTZYF2V-llLy-dTe_j-Pp2MrvtEPVd2CeZk7Kg8R8nNBWebIEVxIyzhPjmbANKA1tqpDOrPPONFlbIZQxGLeyGaQw4qTBWi8TZA_FqnlR_04HkY5KEG81gPJtH8OtoNp7FAefL-OH3u08P8GU0nY4mD-PbGdxNqVmTm7FvFX37AKPJV_hjPLmJAUk6SoVPhfZVEFXhZcW01HCGeEBjoUpapkAuFoJDPUGwVGvUksqCAvVKlLNGJNOAk4uVsMwG25viQqreXM7lT0Ly3KUIF9xx1UtypVaP1HeLustddnXoYzPtjO1x5aTt-sM3RymuKcXjGrlV-rgLPiF3nthjoahp2-NehrqLNGrd1xyEoqYgWwWzkJYmTsjWWom0PZfPVBj0evAbStTMIgz6P_f7ffiG2yANDYNBbYMWC6GNhbNwTjiqMobSPYzH5iSf9a0C6VaPAeWywjw_7mMLGurLvfs76HfPvucrD307FcK5Ly3UMSvYxl-kklZ57U6Jr3FJp7SZwNU3bzhsdu-C0l1Vcc8U8lKBlvIOhweNKp09h1ZNJuyFnXfdjZY_6ia4JMnbcYjookz950G7ysCc17J4DF5UXjPkvOlAgaH-ne_Jvv7aS-5gyoCDoxpgT2Ov3Ph1ozOWL-pdFAK8Y9VulqatmkVcZWr_ULBEqfzKe9IldrltlcRjWLDc4KF2RwNlM_Cg-Q2MqpQ_HeptYzp3O4E2yFooZ_JtNde0p3alUeCDX-ImUy5PoUwHYS1b7bBTKENLcY0Q7gkJ83A_vRw0VaFh9XvcvJEm3hdeN_a_0A1l2oxZoA0cnjR-BaIMrF5dPj_7wj9pwkxXJyYEFlolLKfVTMswZZYB7QPHrSOsuAFToeBTJhJhjb9EXtdXdX-4n171IcWCyvKrVJVMqBUJMSdVQqyoa2tU5bfJoWvGTIbGPz5Tv5J9vcfllDs5ZSWn_I6ci1yRXF5tT_bSmypjqx6vsIxbh8N2pEFNU3Vj_Ri029CrAMvxK2e3zLv4H3lfV9I0_SCvrCs2Ng3YFi4u6HHrh5KetvS5FiGYfd8q-2JvD7GUJK-wNNJUSPCr-oXetPz7C73R6P0LWSTXnA9Oz9yAjlVhy7e1qENAl_zkZPAeOkzz7NKsHt_3odOhzW3pj6UcmHZytkrCK1wukgYm5zwn47p86SID1Su_RS9xfU67-uCctIte_g6__wINDYAL))
+- [Host-bulk APIs (Default fingerprinting policy)](https://github.com/NVIDIA/cuCollections/blob/dev/examples/bloom_filter/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/clientstate/eJydVmtvGjkU_StXsx8WmuEVbVUJQiSapLtoK5IF2qpaVsjj8TBWBnvqBwRF-e977ZmBgZBqtVRqwL6Pc889vvZzoJnWXAod9P9-Dngc9HthkBGxsmTFgn5AbUyCMNDSKup-d94tBLyDG5nvFF-lBhq0CZfdy99CmHwd345HcHM_fbifjubj-0nb2Xr7z5wyoVkMVsRMgUkZjHJC8U-5E8JXphwQuGx3oeEMFkG5twiaAx9lJy2syQ6ENGA1wzBcQ8IzBuyJstwAF0DlOs84EZTBlpvUpyrjeDjwvQwiI0PQnqBHjr-SuiUQs4fuPqkxeb_T2W63beJht6VadbLCWHc-j2_uJrO7FkLfu30RGTILiv2wXGHh0Q5IjsgoiRBvRrYgFZCVYrhnpEO-VdxwsQpBy8RsiWI-Tsy1UTyy5oi8CifWXzdA-ohA4kYzGM8WAXwczcaz0Mf5Np7_cf9lDt9G0-loMh_fzeB-is2a3I5dq_DXJxhNvsOf48ltCAypw1TsKVeuCoTKHa0sLjicMXYEI5EFLJ0zyhNOoVIQrOSGKYFlQc7UmhdaQ5Cxj5PxNTfE-LVXxflUnYVYiF-4oJmNGVxRS2UnyqRcL7Hvhqk2ten1sY1JldWmQ6UVpu02X23FbIMplhtGjVTnTdgTo9YBW-YSm7Y7b6Wxuwyl1j7FwCU2hZG1X-bCoOK4aGwkj5sL8YyFgVukWLdxHIOw6-Uj22kntiH0ur92u90B7D-dTucKfmeCKWJYuQ3O_nwkkxduw0Pcd9Btvx-UkcbIrjKe64QrbSAlWeLjuWCy3PD0vpFAvErQKjMPXntoG5Xd0uhx6SvzOPCr22xVmygR8M0tszvynKmDBmaXF0I7tgCcBE5eaLL0JkOXfbB3neVk60553auYDDFLiM0MFA12mjwF5Kt3kuv365q7qnJdl_GeaxW-lKm1ift91KCBqys8kR9t9ojAPO-e47fxJFgOU7lyJK6Knru5WMDsY6yFgNoHw_tcTMRZmbtQZ79_pPMaatewRtU5P1v3LpWiG26rHbEVyrYZeo82ZnDfe80yDbFOK_nSWyHvdZdB3QAdvVj2thd1sRRWYh-mcDjaqgIcYBwafF7M5Tz3Ds6wlDOJ40aFIiwzNX_KWiRldu0scRBidxoF8BASkml2zN1ZR1F3PDoptRhlKX9Zpna107efqziFN1xane1KDeGs35eGjnN3EepU2iyGIh34q80oy1q51HixbBj44YHEzB-mw16dFTyq7i7Ur6gJD4VXjf0vcH2ZJiUG8Bbzt7W7RpjwqE6Gizui3N3W9QOhvWOuZEQyvN7wQomJIahzZamxGCushSmjsKeUR9yga8HrSd2fHqbXXTxjOZbl5oUskGArIkSOrHhfXtVWq8oNhmPTlOiUafcEif3MwnrP0yn2dIqSTvEGnUkmkS7HtgM7LI64X2xU8vIXWuNYbGcaVF8qT6yTQbMJnTJgIb9Cu0Xe5H_kPa2kvvSTvKJ5fkA6UeKLBb9XJPhl17dyPTmsnw48xVAVArr48wVfq-4NiK9CdXjUBmJDae_yve3htsxN8eINWhhoSC8ueh-gRRRNh3q9_NCFVgvvLYP_GczB4lZG1pF_Bmc8qsWklGa4uCkerriA9YrH4CWs9vHmONpH7oKXf_y_fwHeCexw))
+
+#### Examples:
+- [Host-bulk APIs (Arrow fingerprinting policy)](https://github.com/NVIDIA/cuCollections/blob/dev/examples/bloom_filter/host_bulk_arrow_policy_example.cu) (see [live example in godbolt](https://godbolt.org/clientstate/eJydV22PGjcQ_iuj7YdCbnk7NYq03J3EXZIWNeJSuCSKSoWMdwAri721vXD0dP-9Y68XloNEVeEDu_a8PPPMi81TZNAYoaSJkj-fIpFGSS-OMiaXBVtilES8SFkUR0YVmrv3zquphFdwp_KdFsuVhQZvwmX38pcYRp-Hb4cDuLsff7wfDx6G96O2k_XyHwRHaTCFQqaowa4QBjnj9BN2YviM2gGBy3YXGk5gGoW9adTseys7VcCa7UAqC4VBMiMMLESGgI8ccwtCAlfrPBNMcoStsCvvKtjxcOBrMKLmlpE8I42c3hZ1SWB2D919VtbmSaez3W7bzMNuK73sZKWw6XwY3r0bTd61CPpe7ZPMiFnQ-HchNAU-3wHLCRlnc8KbsS0oDWypkfascsi3WlghlzEYtbBbptHbSYWxWswLe0RehZPirwsQfUwScYMJDCfTCG4Hk-Ek9na-DB9-u__0AF8G4_Fg9DB8N4H7MSVr9HboUkVv72Ew-gq_D0dvY0CijlzhY65dFARVOFoxLTmcIB7BWKgSlsmRi4XgUFUQLNUGtaSwIEe9FmWtEcjU28nEWlhm_dpJcN5VZyqn8icheVakCFe84Kozz5RazyjvFnWbF6ubYxm70oWxHa4Kadtu82QrxQ25mG2QW6XPi-Aj8sIBm-WKkrY7L2Uou0il1n6JQShKCrK1XxbSUsUJ2dgokTan8okCA7fIKW7rOAZZrGffcGdcsV1Dr_tzt9vtw_7T6XSu4FeUqJnFsA1O_rwlm5dq1we7r6Dbft0PlobErrae64XQxsKKZQtvzxlTYcPT-x0H8sRBK3jun2qYYh6yZUjj0kfmcdCj22xVm1Qi4JMbvDvynKiDBnaXl4V2LAE0CVx5kcjMi1w77_296hc3B7IszAsaO1qrbdCGMrcnVulHLlHn2sWxLGmnUjg4K_Uqf64ok4Q5wyHOUDRXFaibfkBzW_fi1f2QOgPq4CyYLJ1NPfOlx3of7F3FYRMfLUp7ZcQ_OLM3bjVlSUJ1iyydGZp5OCvbIK5Hc3MgbpKzrRuPR8R4tD7Sijtq5Zd59EVTR10-P9Xq4Dn4MTZNEupUC1dXNLdui-wbpc9Xp6_EU-cVVecSFDAlZGkqofYh494TyjQLnssOTpKjWXBImO-GRlXd_vzZq1Rd33Bb7TkuqbWbsddokwf33GsGN6xw_ZTPvBTVSl2lXxcgRd9Qe9mLekOVUnJvplQ42qoMHGAccnm-4cOZ5xUOKWuzNG1UKOLgqflD1uZKZTdOkg6LIrONEngMC5YZPOburKKsKx5Nk5qNEMofBepdbULtzx46qTZCFSbbhQqi83AfGik-uMuCWakiS6F0B_74t7rAVq4MHb4bBD9giZiHj-PrXp0VGmfuvmBOqIkPgVeJ_S9wfZh2xSy1E_objTtqqWUdqhcD2HWjcDeaejsYr5hrNWcZXQHo0E2ZZVTnuuC2IFtxzUywgo8rMReWVEteX8T9_uP4pgsp5hSWmzyqREKpmBNyYsXriiq2WlRuChyLrphZoXFjN_VzneI9T6fc0ykDnfI7dC4yRXQ5th3YMAr9YqMqL3_oN46L7UyC6kuhY10ZNJvQCQbL8itrt_S7-B9-X0ZSX_qBX9k8Px5dUdKtjp4rEvyyy1tYXxzWXw48jVQVErr0-jyVdKd3N2W6O-vD1T-SG857l6-LHm2r3Jb_C6IWmbrmFxe9N9Bimq-uzXr2pgutFp3utuUPmhTTVsbWc_9nIRPzmk3OeUaLm_J6TwsUsfwWPcfVPh1XR_vEXvT8l__-C3CDWAg=))
@@ -41,9 +41,9 @@ template <typename Key, typename Hash, typename Word, nvbench::int32_t WordsPerB
 void bloom_filter_add(nvbench::state& state,
                       nvbench::type_list<Key, Hash, Word, nvbench::enum_type<WordsPerBlock>, Dist>)
 {
-  using policy_type = cuco::bloom_filter_policy<rebind_hasher_t<Hash, Key>,
-                                                Word,
-                                                static_cast<std::uint32_t>(WordsPerBlock)>;
+  using policy_type = cuco::default_filter_policy<rebind_hasher_t<Hash, Key>,
+                                                  Word,
+                                                  static_cast<std::uint32_t>(WordsPerBlock)>;
   using filter_type =
     cuco::bloom_filter<Key, cuco::extent<size_t>, cuda::thread_scope_device, policy_type>;
 
@@ -83,6 +83,51 @@ void bloom_filter_add(nvbench::state& state,
   });
 }
 
+/**
+ * @brief A benchmark evaluating `cuco::bloom_filter::add_async` performance with
+ * `arrow_filter_policy`
+ */
+template <typename Key, typename Dist>
+void arrow_bloom_filter_add(nvbench::state& state, nvbench::type_list<Key, Dist>)
+{
+  using policy_type = cuco::arrow_filter_policy<Key>;
+  using filter_type =
+    cuco::bloom_filter<Key, cuco::extent<size_t>, cuda::thread_scope_device, policy_type>;
+
+  auto const num_keys       = state.get_int64("NumInputs");
+  auto const filter_size_mb = state.get_int64("FilterSizeMB");
+
+  std::size_t const num_sub_filters =
+    (filter_size_mb * 1024 * 1024) /
+    (sizeof(typename filter_type::word_type) * filter_type::words_per_block);
+
+  if (num_sub_filters > policy_type::max_filter_blocks) {
+    state.skip("bloom filter with arrow policy should have <= 4194304 blocks");  // skip invalid
+                                                                                 // configurations
+  }
+
+  thrust::device_vector<Key> keys(num_keys);
+
+  key_generator gen;
+  gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
+
+  state.add_element_count(num_keys);
+
+  filter_type filter{num_sub_filters};
+
+  state.collect_dram_throughput();
+  state.collect_l1_hit_rates();
+  state.collect_l2_hit_rates();
+  state.collect_loads_efficiency();
+  state.collect_stores_efficiency();
+
+  add_fpr_summary(state, filter);
+
+  state.exec([&](nvbench::launch& launch) {
+    filter.add_async(keys.begin(), keys.end(), {launch.get_stream()});
+  });
+}
+
 NVBENCH_BENCH_TYPES(bloom_filter_add,
                     NVBENCH_TYPE_AXES(nvbench::type_list<defaults::BF_KEY>,
                                       nvbench::type_list<defaults::BF_HASH>,
@@ -118,3 +163,12 @@ NVBENCH_BENCH_TYPES(bloom_filter_add,
   .set_max_noise(defaults::MAX_NOISE)
   .add_int64_axis("NumInputs", {defaults::BF_N})
   .add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB});
+
+NVBENCH_BENCH_TYPES(arrow_bloom_filter_add,
+                    NVBENCH_TYPE_AXES(nvbench::type_list<defaults::BF_KEY>,
+                                      nvbench::type_list<distribution::unique>))
+  .set_name("arrow_bloom_filter_add_unique_size")
+  .set_type_axes_names({"Key", "Distribution"})
+  .set_max_noise(defaults::MAX_NOISE)
+  .add_int64_axis("NumInputs", {defaults::BF_N})
+  .add_int64_axis("FilterSizeMB", defaults::BF_SIZE_MB_RANGE_CACHE);
@@ -43,9 +43,9 @@ void bloom_filter_contains(
 {
   // cudaDeviceSetLimit(cudaLimitMaxL2FetchGranularity, 32); // slightly improves peformance if
   // filter block fits into a 32B sector
-  using policy_type = cuco::bloom_filter_policy<rebind_hasher_t<Hash, Key>,
-                                                Word,
-                                                static_cast<std::uint32_t>(WordsPerBlock)>;
+  using policy_type = cuco::default_filter_policy<rebind_hasher_t<Hash, Key>,
+                                                  Word,
+                                                  static_cast<std::uint32_t>(WordsPerBlock)>;
   using filter_type =
     cuco::bloom_filter<Key, cuco::extent<size_t>, cuda::thread_scope_device, policy_type>;
 
@@ -88,6 +88,56 @@ void bloom_filter_contains(
   });
 }
 
+/**
+ * @brief A benchmark evaluating `cuco::bloom_filter::contains_async` performance with
+ * `arrow_filter_policy`
+ */
+template <typename Key, typename Dist>
+void arrow_bloom_filter_contains(nvbench::state& state, nvbench::type_list<Key, Dist>)
+{
+  // cudaDeviceSetLimit(cudaLimitMaxL2FetchGranularity, 32); // slightly improves peformance if
+  // filter block fits into a 32B sector
+  using policy_type = cuco::arrow_filter_policy<Key>;
+  using filter_type =
+    cuco::bloom_filter<Key, cuco::extent<size_t>, cuda::thread_scope_device, policy_type>;
+
+  auto const num_keys       = state.get_int64("NumInputs");
+  auto const filter_size_mb = state.get_int64("FilterSizeMB");
+
+  std::size_t const num_sub_filters =
+    (filter_size_mb * 1024 * 1024) /
+    (sizeof(typename filter_type::word_type) * filter_type::words_per_block);
+
+  if (num_sub_filters > policy_type::max_filter_blocks) {
+    state.skip("bloom filter with arrow policy should have <= 4194304 blocks");  // skip invalid
+                                                                                 // configurations
+  }
+
+  thrust::device_vector<Key> keys(num_keys);
+  thrust::device_vector<bool> result(num_keys, false);
+
+  key_generator gen;
+  gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
+
+  state.add_element_count(num_keys);
+
+  filter_type filter{num_sub_filters};
+
+  state.collect_dram_throughput();
+  state.collect_l1_hit_rates();
+  state.collect_l2_hit_rates();
+  state.collect_loads_efficiency();
+  state.collect_stores_efficiency();
+
+  add_fpr_summary(state, filter);
+
+  filter.add(keys.begin(), keys.end());
+
+  state.exec([&](nvbench::launch& launch) {
+    filter.contains_async(keys.begin(), keys.end(), result.begin(), {launch.get_stream()});
+  });
+}
+
 NVBENCH_BENCH_TYPES(bloom_filter_contains,
                     NVBENCH_TYPE_AXES(nvbench::type_list<defaults::BF_KEY>,
                                       nvbench::type_list<defaults::BF_HASH>,
@@ -122,4 +172,13 @@ NVBENCH_BENCH_TYPES(bloom_filter_contains,
   .set_type_axes_names({"Key", "Hash", "Word", "WordsPerBlock", "Distribution"})
   .set_max_noise(defaults::MAX_NOISE)
   .add_int64_axis("NumInputs", {defaults::BF_N})
-  .add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB});
+  .add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB});
+
+NVBENCH_BENCH_TYPES(arrow_bloom_filter_contains,
+                    NVBENCH_TYPE_AXES(nvbench::type_list<defaults::BF_KEY>,
+                                      nvbench::type_list<distribution::unique>))
+  .set_name("arrow_bloom_filter_contains_unique_size")
+  .set_type_axes_names({"Key", "Distribution"})
+  .set_max_noise(defaults::MAX_NOISE)
+  .add_int64_axis("NumInputs", {defaults::BF_N})
+  .add_int64_axis("FilterSizeMB", defaults::BF_SIZE_MB_RANGE_CACHE);
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -47,3 +47,4 @@ ConfigureExample(STATIC_MULTIMAP_HOST_BULK_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/
 ConfigureExample(HYPERLOGLOG_HOST_BULK_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/hyperloglog/host_bulk_example.cu")
 ConfigureExample(HYPERLOGLOG_DEVICE_REF_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/hyperloglog/device_ref_example.cu")
 ConfigureExample(BLOOM_FILTER_HOST_BULK_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/bloom_filter/host_bulk_example.cu")
+ConfigureExample(BLOOM_FILTER_ARROW_POLICY_HOST_BULK_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/bloom_filter/host_bulk_arrow_policy_example.cu")
diff --git a/examples/bloom_filter/host_bulk_arrow_policy_example.cu b/examples/bloom_filter/host_bulk_arrow_policy_example.cu
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cuco/bloom_filter.cuh>
+
+#include <thrust/count.h>
+#include <thrust/device_vector.h>
+#include <thrust/execution_policy.h>
+#include <thrust/sequence.h>
+
+#include <iostream>
+
+int main(void)
+{
+  int constexpr num_keys    = 10'000;          ///< Generate 10'000 keys
+  int constexpr num_tp      = num_keys * 0.5;  ///< Insert the first half keys into the filter.
+  int constexpr num_tn      = num_keys - num_tp;
+  int constexpr sub_filters = 200;  ///< 200 sub-filters per bloom filter
+
+  // key type for bloom filter
+  using key_type = int;
+
+  // We will use the Arrow filter policy for bloom filter fingerprint generation
+  using policy_type = cuco::arrow_filter_policy<key_type>;
+  // Bloom filter type with Arrow filter policy
+  using filter_type =
+    cuco::bloom_filter<key_type, cuco::extent<size_t>, cuda::thread_scope_device, policy_type>;
+
+  // Spawn a bloom filter with arrow policy and 200 sub-filters.
+  filter_type filter{sub_filters};
+
+  std::cout << "Bulk insert into bloom filter with Arrow fingerprint generation policy: "
+            << std::endl;
+
+  thrust::device_vector<key_type> keys(num_keys);
+  thrust::sequence(keys.begin(), keys.end(), 1);
+
+  auto tp_begin = keys.begin();
+  auto tp_end   = tp_begin + num_tp;
+  auto tn_begin = tp_end;
+  auto tn_end   = keys.end();
+
+  // Insert the first half of the keys.
+  filter.add(tp_begin, tp_end);
+
+  thrust::device_vector<bool> tp_result(num_tp, false);
+  thrust::device_vector<bool> tn_result(num_keys - num_tp, false);
+
+  // Query the filter for the previously inserted keys.
+  // This should result in a true-positive rate of TPR=1.
+  filter.contains(tp_begin, tp_end, tp_result.begin());
+
+  // Query the filter for the keys that are not present in the filter.
+  // Since bloom filters are probalistic data structures, the filter
+  // exhibits a false-positive rate FPR>0 depending on the number of bits in
+  // the filter and the number of hashes used per key.
+  filter.contains(tn_begin, tn_end, tn_result.begin());
+
+  float tp_rate =
+    float(thrust::count(thrust::device, tp_result.begin(), tp_result.end(), true)) / float(num_tp);
+  float fp_rate =
+    float(thrust::count(thrust::device, tn_result.begin(), tn_result.end(), true)) / float(num_tn);
+
+  std::cout << "TPR=" << tp_rate << " FPR=" << fp_rate << std::endl;
+
+  return 0;
+}
@@ -25,15 +25,21 @@
 
 int main(void)
 {
-  // Generate 10'000 keys and insert the first 5'000 into the filter.
-  int constexpr num_keys = 10'000;
-  int constexpr num_tp   = num_keys * 0.5;
-  int constexpr num_tn   = num_keys - num_tp;
+  int constexpr num_keys    = 10'000;          ///< Generate 10'000 keys
+  int constexpr num_tp      = num_keys * 0.5;  ///< Insert the first half keys into the filter.
+  int constexpr num_tn      = num_keys - num_tp;
+  int constexpr sub_filters = 200;  ///< 200 sub-filters per bloom filter
 
-  // Spawn a filter with 200 sub-filters.
-  cuco::bloom_filter<int> filter{200};
+  // key type for bloom filter
+  using key_type = int;
 
-  thrust::device_vector<int> keys(num_keys);
+  // Spawn a bloom filter with default policy and 200 sub-filters.
+  cuco::bloom_filter<key_type> filter{sub_filters};
+
+  std::cout << "Bulk insert into bloom filter with default fingerprint generation policy: "
+            << std::endl;
+
+  thrust::device_vector<key_type> keys(num_keys);
   thrust::sequence(keys.begin(), keys.end(), 1);
 
   auto tp_begin = keys.begin();

@@ -16,7 +16,7 @@
 
 #pragma once
 
-#include <cuco/bloom_filter_policy.cuh>
+#include <cuco/bloom_filter_policies.cuh>
 #include <cuco/bloom_filter_ref.cuh>
 #include <cuco/detail/storage/storage_base.cuh>
 #include <cuco/extent.cuh>
@@ -55,13 +55,13 @@ namespace cuco {
  * @tparam Extent Size type that is used to determine the number of blocks in the filter
  * @tparam Scope The scope in which operations will be performed by individual threads
  * @tparam Policy Type that defines how to generate and store key fingerprints (see
- * `cuco/bloom_filter_policy.cuh`)
+ * `cuco/bloom_filter_policies.cuh`)
  * @tparam Allocator Type of allocator used for device-accessible storage
  */
 template <class Key,
           class Extent             = cuco::extent<std::size_t>,
           cuda::thread_scope Scope = cuda::thread_scope_device,
-          class Policy    = cuco::bloom_filter_policy<cuco::xxhash_64<Key>, std::uint32_t, 8>,
+          class Policy    = cuco::default_filter_policy<cuco::xxhash_64<Key>, std::uint32_t, 8>,
           class Allocator = cuco::cuda_allocator<cuda::std::byte>>
 class bloom_filter {
  public:
@@ -109,7 +109,7 @@ class bloom_filter {
    *
    * @param num_blocks Number of sub-filters or blocks
    * @param scope The scope in which operations will be performed
-   * @param policy Fingerprint generation policy (see `cuco/bloom_filter_policy.cuh`)
+   * @param policy Fingerprint generation policy (see `cuco/bloom_filter_policies.cuh`)
    * @param alloc Allocator used for allocating device-accessible storage
    * @param stream CUDA stream used to initialize the filter
    */