From 94464303595ad4694e22469f99d5069874ff0e6d Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Fri, 9 Feb 2024 15:04:01 +0100 Subject: [PATCH] Fix failing C++ tests and revert #2097, #2085. (#2168) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RAFT C++ tests were not running for a portion of the 24.02 development cycle, until the merger of https://github.com/rapidsai/rapids-cmake/pull/533. This PR fixes some failing tests and reverts PRs that caused test failures that were silent until now, specifically #2097 and #2085. These features will be revisited in a subsequent release. Authors: - Malte Förster (https://github.com/mfoerste4) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Ben Frederickson (https://github.com/benfred) - Bradley Dice (https://github.com/bdice) --- cpp/CMakeLists.txt | 14 -- .../core/detail/mdspan_numpy_serializer.hpp | 14 +- cpp/include/raft/core/device_mdspan.hpp | 8 +- .../neighbors/detail/cagra/cagra_search.cuh | 2 +- .../cagra/search_multi_cta_kernel-ext.cuh | 238 +++++++++--------- .../detail/cagra/search_multi_kernel.cuh | 165 +++--------- .../neighbors/detail/cagra/search_plan.cuh | 10 +- .../cagra/search_single_cta_kernel-ext.cuh | 10 - .../detail/ivf_flat_interleaved_scan-ext.cuh | 4 - .../detail/ivf_flat_interleaved_scan-inl.cuh | 2 +- .../neighbors/detail/ivf_flat_search-ext.cuh | 4 - .../raft/neighbors/detail/ivf_pq_build.cuh | 8 +- .../raft/neighbors/detail/ivf_pq_search.cuh | 3 +- .../raft/neighbors/detail/refine_host-ext.hpp | 9 - cpp/include/raft/neighbors/ivf_pq-ext.cuh | 3 - cpp/include/raft/sparse/linalg/spmm.hpp | 9 +- cpp/include/raft/util/device_loads_stores.cuh | 144 ----------- cpp/include/raft/util/input_validation.hpp | 9 +- .../cagra/search_multi_cta_00_generate.py | 2 - ...earch_multi_cta_half_uint32_dim1024_t32.cu | 66 ----- .../search_multi_cta_half_uint32_dim128_t8.cu | 66 ----- ...search_multi_cta_half_uint32_dim256_t16.cu | 66 ----- ...search_multi_cta_half_uint32_dim512_t32.cu | 66 ----- ...earch_multi_cta_half_uint64_dim1024_t32.cu | 66 ----- .../search_multi_cta_half_uint64_dim128_t8.cu | 66 ----- ...search_multi_cta_half_uint64_dim256_t16.cu | 66 ----- ...search_multi_cta_half_uint64_dim512_t32.cu | 66 ----- .../cagra/search_single_cta_00_generate.py | 2 - ...arch_single_cta_half_uint32_dim1024_t32.cu | 67 ----- ...search_single_cta_half_uint32_dim128_t8.cu | 67 ----- ...earch_single_cta_half_uint32_dim256_t16.cu | 67 ----- ...earch_single_cta_half_uint32_dim512_t32.cu | 67 ----- ...arch_single_cta_half_uint64_dim1024_t32.cu | 67 ----- ...search_single_cta_half_uint64_dim128_t8.cu | 67 ----- ...earch_single_cta_half_uint64_dim256_t16.cu | 67 ----- ...earch_single_cta_half_uint64_dim512_t32.cu | 67 ----- ...flat_interleaved_scan_half_half_int64_t.cu | 44 ---- .../detail/refine_host_half_float.cpp | 31 --- cpp/src/neighbors/ivfpq_build_half_int64_t.cu | 38 --- .../neighbors/ivfpq_extend_half_int64_t.cu | 52 ---- .../neighbors/ivfpq_search_half_int64_t.cu | 44 ---- cpp/src/neighbors/refine_00_generate.py | 1 - cpp/src/neighbors/refine_half_float.cu | 50 ---- cpp/src/raft_runtime/neighbors/cagra_build.cu | 3 - .../raft_runtime/neighbors/cagra_search.cu | 3 - .../raft_runtime/neighbors/cagra_serialize.cu | 3 - cpp/test/CMakeLists.txt | 10 - cpp/test/neighbors/ann_cagra.cuh | 75 +----- .../neighbors/ann_cagra/test_half_int64_t.cu | 29 --- .../neighbors/ann_cagra/test_half_uint32_t.cu | 40 --- 50 files changed, 184 insertions(+), 1963 deletions(-) delete mode 100644 cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu delete mode 100644 cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu delete mode 100644 cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu delete mode 100644 cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu delete mode 100644 cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu delete mode 100644 cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu delete mode 100644 cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu delete mode 100644 cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu delete mode 100644 cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu delete mode 100644 cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu delete mode 100644 cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu delete mode 100644 cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu delete mode 100644 cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu delete mode 100644 cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu delete mode 100644 cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu delete mode 100644 cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu delete mode 100644 cpp/src/neighbors/detail/ivf_flat_interleaved_scan_half_half_int64_t.cu delete mode 100644 cpp/src/neighbors/detail/refine_host_half_float.cpp delete mode 100644 cpp/src/neighbors/ivfpq_build_half_int64_t.cu delete mode 100644 cpp/src/neighbors/ivfpq_extend_half_int64_t.cu delete mode 100644 cpp/src/neighbors/ivfpq_search_half_int64_t.cu delete mode 100644 cpp/src/neighbors/refine_half_float.cu delete mode 100644 cpp/test/neighbors/ann_cagra/test_half_int64_t.cu delete mode 100644 cpp/test/neighbors/ann_cagra/test_half_uint32_t.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 36c75a686d..7670065728 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -346,10 +346,6 @@ if(RAFT_COMPILE_LIBRARY) src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu @@ -362,10 +358,6 @@ if(RAFT_COMPILE_LIBRARY) src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu - src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu - src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu - src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu - src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu @@ -375,7 +367,6 @@ if(RAFT_COMPILE_LIBRARY) src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu src/neighbors/detail/ivf_flat_interleaved_scan_float_float_int64_t.cu - src/neighbors/detail/ivf_flat_interleaved_scan_half_half_int64_t.cu src/neighbors/detail/ivf_flat_interleaved_scan_int8_t_int32_t_int64_t.cu src/neighbors/detail/ivf_flat_interleaved_scan_uint8_t_uint32_t_int64_t.cu src/neighbors/detail/ivf_flat_search.cu @@ -387,7 +378,6 @@ if(RAFT_COMPILE_LIBRARY) src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu src/neighbors/detail/refine_host_float_float.cpp - src/neighbors/detail/refine_host_half_float.cpp src/neighbors/detail/refine_host_int8_t_float.cpp src/neighbors/detail/refine_host_uint8_t_float.cpp src/neighbors/ivf_flat_build_float_int64_t.cu @@ -400,19 +390,15 @@ if(RAFT_COMPILE_LIBRARY) src/neighbors/ivf_flat_search_int8_t_int64_t.cu src/neighbors/ivf_flat_search_uint8_t_int64_t.cu src/neighbors/ivfpq_build_float_int64_t.cu - src/neighbors/ivfpq_build_half_int64_t.cu src/neighbors/ivfpq_build_int8_t_int64_t.cu src/neighbors/ivfpq_build_uint8_t_int64_t.cu src/neighbors/ivfpq_extend_float_int64_t.cu - src/neighbors/ivfpq_extend_half_int64_t.cu src/neighbors/ivfpq_extend_int8_t_int64_t.cu src/neighbors/ivfpq_extend_uint8_t_int64_t.cu src/neighbors/ivfpq_search_float_int64_t.cu - src/neighbors/ivfpq_search_half_int64_t.cu src/neighbors/ivfpq_search_int8_t_int64_t.cu src/neighbors/ivfpq_search_uint8_t_int64_t.cu src/neighbors/refine_float_float.cu - src/neighbors/refine_half_float.cu src/neighbors/refine_int8_t_float.cu src/neighbors/refine_uint8_t_float.cu src/raft_runtime/cluster/cluster_cost.cuh diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp index 176309c8ce..168dabc990 100644 --- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp +++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp @@ -20,10 +20,6 @@ #include #include -#if defined(_RAFT_HAS_CUDA) -#include -#endif - #include #include #include @@ -125,14 +121,6 @@ inline dtype_t get_numpy_dtype() return {RAFT_NUMPY_HOST_ENDIAN_CHAR, 'f', sizeof(T)}; } -#if defined(_RAFT_HAS_CUDA) -template , bool> = true> -inline dtype_t get_numpy_dtype() -{ - return {RAFT_NUMPY_HOST_ENDIAN_CHAR, 'e', sizeof(T)}; -} -#endif - template && std::is_signed_v, bool> = true> inline dtype_t get_numpy_dtype() @@ -285,7 +273,7 @@ inline dtype_t parse_descr(std::string typestr) const char endian_chars[] = { RAFT_NUMPY_LITTLE_ENDIAN_CHAR, RAFT_NUMPY_BIG_ENDIAN_CHAR, RAFT_NUMPY_NO_ENDIAN_CHAR}; - const char numtype_chars[] = {'f', 'i', 'u', 'c', 'e'}; + const char numtype_chars[] = {'f', 'i', 'u', 'c'}; RAFT_EXPECTS(std::find(std::begin(endian_chars), std::end(endian_chars), byteorder_c) != std::end(endian_chars), diff --git a/cpp/include/raft/core/device_mdspan.hpp b/cpp/include/raft/core/device_mdspan.hpp index 3b6165b86a..7988bd3f6f 100644 --- a/cpp/include/raft/core/device_mdspan.hpp +++ b/cpp/include/raft/core/device_mdspan.hpp @@ -207,8 +207,12 @@ auto constexpr make_device_strided_matrix_view(ElementType* ptr, IndexType stride) { constexpr auto is_row_major = std::is_same_v; - IndexType stride0 = is_row_major ? (stride > 0 ? stride : n_cols) : 1; - IndexType stride1 = is_row_major ? 1 : (stride > 0 ? stride : n_rows); + constexpr auto is_col_major = std::is_same_v; + + assert(is_row_major || is_col_major); + + IndexType stride0 = is_row_major ? (stride > 0 ? stride : n_cols) : 1; + IndexType stride1 = is_row_major ? 1 : (stride > 0 ? stride : n_rows); assert(is_row_major ? stride0 >= n_cols : stride1 >= n_rows); matrix_extent extents{n_rows, n_cols}; diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index 40cc7c76fb..41a43c9bce 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -131,7 +131,7 @@ void search_main(raft::resources const& res, factory::create( res, params, index.dim(), index.graph_degree(), topk); - plan->check(topk); + plan->check(neighbors.extent(1)); RAFT_LOG_DEBUG("Cagra search"); const uint32_t max_queries = plan->max_queries; diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh index 7a5ad17460..3179586b98 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh @@ -1,124 +1,114 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include // none_cagra_sample_filter -#include // RAFT_EXPLICIT - -#include - -namespace raft::neighbors::cagra::detail { -namespace multi_cta_search { - -#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY - -template -void select_and_run(raft::device_matrix_view dataset, - raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, - DISTANCE_T* const topk_distances_ptr, - const DATA_T* const queries_ptr, - const uint32_t num_queries, - const INDEX_T* dev_seed_ptr, - uint32_t* const num_executed_iterations, - uint32_t topk, - uint32_t block_size, - uint32_t result_buffer_size, - uint32_t smem_size, - int64_t hash_bitlen, - INDEX_T* hashmap_ptr, - uint32_t num_cta_per_query, - uint32_t num_random_samplings, - uint64_t rand_xor_mask, - uint32_t num_seeds, - size_t itopk_size, - size_t search_width, - size_t min_iterations, - size_t max_iterations, - SAMPLE_FILTER_T sample_filter, - cudaStream_t stream) RAFT_EXPLICIT; -#endif // RAFT_EXPLICIT_INSTANTIATE_ONLY - -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_kernel_selection( - 32, 1024, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 8, 128, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 16, 256, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 32, 512, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 32, 1024, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 8, 128, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 16, 256, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 32, 512, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 32, 1024, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 8, 128, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 16, 256, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 32, 512, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 32, 1024, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 8, 128, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 16, 256, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection -} // namespace multi_cta_search -} // namespace raft::neighbors::cagra::detail +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include // none_cagra_sample_filter +#include // RAFT_EXPLICIT + +namespace raft::neighbors::cagra::detail { +namespace multi_cta_search { + +#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY + +template +void select_and_run(raft::device_matrix_view dataset, + raft::device_matrix_view graph, + INDEX_T* const topk_indices_ptr, + DISTANCE_T* const topk_distances_ptr, + const DATA_T* const queries_ptr, + const uint32_t num_queries, + const INDEX_T* dev_seed_ptr, + uint32_t* const num_executed_iterations, + uint32_t topk, + uint32_t block_size, + uint32_t result_buffer_size, + uint32_t smem_size, + int64_t hash_bitlen, + INDEX_T* hashmap_ptr, + uint32_t num_cta_per_query, + uint32_t num_random_samplings, + uint64_t rand_xor_mask, + uint32_t num_seeds, + size_t itopk_size, + size_t search_width, + size_t min_iterations, + size_t max_iterations, + SAMPLE_FILTER_T sample_filter, + cudaStream_t stream) RAFT_EXPLICIT; +#endif // RAFT_EXPLICIT_INSTANTIATE_ONLY + +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, 1024, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, 128, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, 256, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, 512, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, 1024, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, 128, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, 256, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, 512, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, 1024, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, 128, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, 256, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection +} // namespace multi_cta_search +} // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh index f9bf525503..e302dddedf 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh @@ -37,7 +37,6 @@ #include "topk_for_cagra/topk_core.cuh" //todo replace with raft kernel #include "utils.hpp" #include -#include #include #include // RAFT_CUDA_TRY_NOT_THROW is used TODO(tfeher): consider moving this to cuda_rt_essentials.hpp @@ -654,12 +653,6 @@ struct search : search_plan_impl { rmm::device_scalar terminate_flag; // dev_terminate_flag, host_terminate_flag.; rmm::device_uvector topk_workspace; - // temporary storage for _find_topk - rmm::device_uvector input_keys_storage; - rmm::device_uvector output_keys_storage; - rmm::device_uvector input_values_storage; - rmm::device_uvector output_values_storage; - search(raft::resources const& res, search_params params, int64_t dim, @@ -672,11 +665,7 @@ struct search : search_plan_impl { parent_node_list(0, resource::get_cuda_stream(res)), topk_hint(0, resource::get_cuda_stream(res)), topk_workspace(0, resource::get_cuda_stream(res)), - terminate_flag(resource::get_cuda_stream(res)), - input_keys_storage(0, resource::get_cuda_stream(res)), - output_keys_storage(0, resource::get_cuda_stream(res)), - input_values_storage(0, resource::get_cuda_stream(res)), - output_values_storage(0, resource::get_cuda_stream(res)) + terminate_flag(resource::get_cuda_stream(res)) { set_params(res); } @@ -706,98 +695,6 @@ struct search : search_plan_impl { ~search() {} - inline void _find_topk(raft::resources const& handle, - uint32_t topK, - uint32_t sizeBatch, - uint32_t numElements, - const float* inputKeys, // [sizeBatch, ldIK,] - uint32_t ldIK, // (*) ldIK >= numElements - const INDEX_T* inputVals, // [sizeBatch, ldIV,] - uint32_t ldIV, // (*) ldIV >= numElements - float* outputKeys, // [sizeBatch, ldOK,] - uint32_t ldOK, // (*) ldOK >= topK - INDEX_T* outputVals, // [sizeBatch, ldOV,] - uint32_t ldOV, // (*) ldOV >= topK - void* workspace, - bool sort, - uint32_t* hints) - { - auto stream = resource::get_cuda_stream(handle); - - // _cuann_find_topk right now is limited to a max-k of 1024. - // RAFT has a matrix::select_k function - which handles arbitrary sized values of k, - // but doesn't accept strided inputs unlike _cuann_find_topk - // The multi-kernel search path requires strided access - since its cleverly allocating memory - // (layout described in the search_plan_impl function below), such that both the - // neighbors and the internal_topk are adjacent - in a double buffered format. - // Since this layout doesn't work with the matrix::select_k code - we have to copy - // over to a contiguous (non-strided) access to handle topk larger than 1024, and - // potentially also copy back to a strided layout afterwards - if (topK <= 1024) { - return _cuann_find_topk(topK, - sizeBatch, - numElements, - inputKeys, - ldIK, - inputVals, - ldIV, - outputKeys, - ldOK, - outputVals, - ldOV, - workspace, - sort, - hints, - stream); - } - - if (ldIK > numElements) { - if (input_keys_storage.size() != sizeBatch * numElements) { - input_keys_storage.resize(sizeBatch * numElements, stream); - } - batched_memcpy( - input_keys_storage.data(), numElements, inputKeys, ldIK, numElements, sizeBatch, stream); - inputKeys = input_keys_storage.data(); - } - - if (ldIV > numElements) { - if (input_values_storage.size() != sizeBatch * numElements) { - input_values_storage.resize(sizeBatch * numElements, stream); - } - - batched_memcpy( - input_values_storage.data(), numElements, inputVals, ldIV, numElements, sizeBatch, stream); - inputVals = input_values_storage.data(); - } - - if ((ldOK > topK) && (output_keys_storage.size() != sizeBatch * topK)) { - output_keys_storage.resize(sizeBatch * topK, stream); - } - - if ((ldOV > topK) && (output_values_storage.size() != sizeBatch * topK)) { - output_values_storage.resize(sizeBatch * topK, stream); - } - - raft::matrix::select_k( - handle, - raft::make_device_matrix_view(inputKeys, sizeBatch, numElements), - raft::make_device_matrix_view(inputVals, sizeBatch, numElements), - raft::make_device_matrix_view( - ldOK > topK ? output_keys_storage.data() : outputKeys, sizeBatch, topK), - raft::make_device_matrix_view( - ldOV > topK ? output_values_storage.data() : outputVals, sizeBatch, topK), - true, // select_min - sort); - - if (ldOK > topK) { - batched_memcpy(outputKeys, ldOK, output_keys_storage.data(), topK, topK, sizeBatch, stream); - } - - if (ldOV > topK) { - batched_memcpy(outputVals, ldOV, output_values_storage.data(), topK, topK, sizeBatch, stream); - } - } - void operator()(raft::resources const& res, raft::device_matrix_view dataset, raft::device_matrix_view graph, @@ -849,21 +746,21 @@ struct search : search_plan_impl { unsigned iter = 0; while (1) { // Make an index list of internal top-k nodes - _find_topk(res, - itopk_size, - num_queries, - result_buffer_size, - result_distances.data() + (iter & 0x1) * itopk_size, - result_buffer_allocation_size, - result_indices.data() + (iter & 0x1) * itopk_size, - result_buffer_allocation_size, - result_distances.data() + (1 - (iter & 0x1)) * result_buffer_size, - result_buffer_allocation_size, - result_indices.data() + (1 - (iter & 0x1)) * result_buffer_size, - result_buffer_allocation_size, - topk_workspace.data(), - true, - top_hint_ptr); + _cuann_find_topk(itopk_size, + num_queries, + result_buffer_size, + result_distances.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + result_indices.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + result_distances.data() + (1 - (iter & 0x1)) * result_buffer_size, + result_buffer_allocation_size, + result_indices.data() + (1 - (iter & 0x1)) * result_buffer_size, + result_buffer_allocation_size, + topk_workspace.data(), + true, + top_hint_ptr, + stream); // termination (1) if ((iter + 1 == max_iterations)) { @@ -944,21 +841,21 @@ struct search : search_plan_impl { result_indices_ptr = result_indices.data() + (1 - (iter & 0x1)) * result_buffer_size; result_distances_ptr = result_distances.data() + (1 - (iter & 0x1)) * result_buffer_size; - _find_topk(res, - itopk_size, - num_queries, - result_buffer_size, - result_distances.data() + (iter & 0x1) * itopk_size, - result_buffer_allocation_size, - result_indices.data() + (iter & 0x1) * itopk_size, - result_buffer_allocation_size, - result_distances_ptr, - result_buffer_allocation_size, - result_indices_ptr, - result_buffer_allocation_size, - topk_workspace.data(), - true, - top_hint_ptr); + _cuann_find_topk(itopk_size, + num_queries, + result_buffer_size, + result_distances.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + result_indices.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + result_distances_ptr, + result_buffer_allocation_size, + result_indices_ptr, + result_buffer_allocation_size, + topk_workspace.data(), + true, + top_hint_ptr, + stream); } else { // Remove parent bit in search results remove_parent_bit( diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh index 271a1f4955..20df2adf61 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh @@ -42,12 +42,9 @@ struct search_plan_impl_base : public search_params { if (itopk_size <= 512 && search_params::max_queries >= num_sm * 2lu) { algo = search_algo::SINGLE_CTA; RAFT_LOG_DEBUG("Auto strategy: selecting single-cta"); - } else if (topk <= 1024) { + } else { algo = search_algo::MULTI_CTA; RAFT_LOG_DEBUG("Auto strategy: selecting multi-cta"); - } else { - algo = search_algo::MULTI_KERNEL; - RAFT_LOG_DEBUG("Auto strategy: selecting multi kernel"); } } } @@ -258,8 +255,7 @@ struct search_plan_impl : public search_plan_impl_base { virtual void check(const uint32_t topk) { // For single-CTA and multi kernel - RAFT_EXPECTS( - topk <= itopk_size, "topk = %u must be smaller than itopk_size = %lu", topk, itopk_size); + RAFT_EXPECTS(topk <= itopk_size, "topk must be smaller than itopk_size = %lu", itopk_size); } inline void check_params() @@ -267,7 +263,7 @@ struct search_plan_impl : public search_plan_impl_base { std::string error_message = ""; if (itopk_size > 1024) { - if ((algo == search_algo::MULTI_CTA) || (algo == search_algo::MULTI_KERNEL)) { + if (algo == search_algo::MULTI_CTA) { } else { error_message += std::string("- `internal_topk` (" + std::to_string(itopk_size) + ") must be smaller or equal to 1024"); diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh index fef060ffee..99e2fbede2 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh @@ -18,8 +18,6 @@ #include #include // RAFT_EXPLICIT -#include - namespace raft::neighbors::cagra::detail { namespace single_cta_search { @@ -98,14 +96,6 @@ instantiate_single_cta_select_and_run( 16, 256, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); instantiate_single_cta_select_and_run( 32, 512, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 32, 1024, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 8, 128, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 16, 256, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 32, 512, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); instantiate_single_cta_select_and_run( 32, 1024, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); instantiate_single_cta_select_and_run( diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-ext.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-ext.cuh index 58e94ee7aa..6bd8c91157 100644 --- a/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-ext.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-ext.cuh @@ -22,8 +22,6 @@ #include // RAFT_EXPLICIT #include // rmm:cuda_stream_view -#include - #ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY namespace raft::neighbors::ivf_flat::detail { @@ -69,8 +67,6 @@ void ivfflat_interleaved_scan(const raft::neighbors::ivf_flat::index& i instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan( float, float, int64_t, raft::neighbors::filtering::none_ivf_sample_filter); -instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan( - half, half, int64_t, raft::neighbors::filtering::none_ivf_sample_filter); instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan( int8_t, int32_t, int64_t, raft::neighbors::filtering::none_ivf_sample_filter); instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan( diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-inl.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-inl.cuh index 1cf042c6cd..6e2821f3de 100644 --- a/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-inl.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-inl.cuh @@ -168,7 +168,7 @@ struct loadAndComputeDist { const T*& data, const T* query, const int lane_id, const int dim, const int dimBlocks) { const int loadDim = dimBlocks + lane_id; - T queryReg = loadDim < dim ? query[loadDim] : T{0}; + T queryReg = loadDim < dim ? query[loadDim] : 0; const int loadDataIdx = lane_id * Veclen; for (int d = 0; d < dim - dimBlocks; d += Veclen, data += kIndexGroupSize * Veclen) { T enc[Veclen]; diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_search-ext.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_search-ext.cuh index d7dfe5c363..a4d885aee9 100644 --- a/cpp/include/raft/neighbors/detail/ivf_flat_search-ext.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_flat_search-ext.cuh @@ -21,8 +21,6 @@ #include // none_ivf_sample_filter #include // RAFT_EXPLICIT -#include - #ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY namespace raft::neighbors::ivf_flat::detail { @@ -58,8 +56,6 @@ void search(raft::resources const& handle, instantiate_raft_neighbors_ivf_flat_detail_search( float, int64_t, raft::neighbors::filtering::none_ivf_sample_filter); -instantiate_raft_neighbors_ivf_flat_detail_search( - half, int64_t, raft::neighbors::filtering::none_ivf_sample_filter); instantiate_raft_neighbors_ivf_flat_detail_search( int8_t, int64_t, raft::neighbors::filtering::none_ivf_sample_filter); instantiate_raft_neighbors_ivf_flat_detail_search( diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh index 2dfb261f32..80033c384b 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh @@ -54,8 +54,6 @@ #include #include -#include - #include #include @@ -1564,8 +1562,7 @@ void extend(raft::resources const& handle, RAFT_EXPECTS(new_indices != nullptr || index->size() == 0, "You must pass data indices when the index is non-empty."); - static_assert(std::is_same_v || std::is_same_v || std::is_same_v || - std::is_same_v, + static_assert(std::is_same_v || std::is_same_v || std::is_same_v, "Unsupported data type"); rmm::mr::device_memory_resource* device_memory = raft::resource::get_workspace_resource(handle); @@ -1737,8 +1734,7 @@ auto build(raft::resources const& handle, { common::nvtx::range fun_scope( "ivf_pq::build(%zu, %u)", size_t(n_rows), dim); - static_assert(std::is_same_v || std::is_same_v || std::is_same_v || - std::is_same_v, + static_assert(std::is_same_v || std::is_same_v || std::is_same_v, "Unsupported data type"); RAFT_EXPECTS(n_rows > 0 && dim > 0, "empty dataset"); diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh index 129f9d6ecf..3d33bfacf7 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh @@ -738,8 +738,7 @@ inline void search(raft::resources const& handle, float* distances, IvfSampleFilterT sample_filter = IvfSampleFilterT()) { - static_assert(std::is_same_v || std::is_same_v || std::is_same_v || - std::is_same_v, + static_assert(std::is_same_v || std::is_same_v || std::is_same_v, "Unsupported element type."); common::nvtx::range fun_scope( "ivf_pq::search(n_queries = %u, n_probes = %u, k = %u, dim = %zu)", diff --git a/cpp/include/raft/neighbors/detail/refine_host-ext.hpp b/cpp/include/raft/neighbors/detail/refine_host-ext.hpp index a2f75d0f8b..d807ac5239 100644 --- a/cpp/include/raft/neighbors/detail/refine_host-ext.hpp +++ b/cpp/include/raft/neighbors/detail/refine_host-ext.hpp @@ -18,15 +18,10 @@ #include // int64_t -#include // _RAFT_HAS_CUDA #include // raft::host_matrix_view #include // raft::distance::DistanceType #include // RAFT_EXPLICIT -#if defined(_RAFT_HAS_CUDA) -#include -#endif - #ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY namespace raft::neighbors::detail { @@ -57,8 +52,4 @@ instantiate_raft_neighbors_refine(int64_t, float, float, int64_t); instantiate_raft_neighbors_refine(int64_t, int8_t, float, int64_t); instantiate_raft_neighbors_refine(int64_t, uint8_t, float, int64_t); -#if defined(_RAFT_HAS_CUDA) -instantiate_raft_neighbors_refine(int64_t, half, float, int64_t); -#endif - #undef instantiate_raft_neighbors_refine diff --git a/cpp/include/raft/neighbors/ivf_pq-ext.cuh b/cpp/include/raft/neighbors/ivf_pq-ext.cuh index 188219dfa2..038b2e7263 100644 --- a/cpp/include/raft/neighbors/ivf_pq-ext.cuh +++ b/cpp/include/raft/neighbors/ivf_pq-ext.cuh @@ -150,7 +150,6 @@ search(raft::resources const& handle, ->raft::neighbors::ivf_pq::index; instantiate_raft_neighbors_ivf_pq_build(float, int64_t); -instantiate_raft_neighbors_ivf_pq_build(half, int64_t); instantiate_raft_neighbors_ivf_pq_build(int8_t, int64_t); instantiate_raft_neighbors_ivf_pq_build(uint8_t, int64_t); @@ -185,7 +184,6 @@ instantiate_raft_neighbors_ivf_pq_build(uint8_t, int64_t); IdxT n_rows); instantiate_raft_neighbors_ivf_pq_extend(float, int64_t); -instantiate_raft_neighbors_ivf_pq_extend(half, int64_t); instantiate_raft_neighbors_ivf_pq_extend(int8_t, int64_t); instantiate_raft_neighbors_ivf_pq_extend(uint8_t, int64_t); @@ -222,7 +220,6 @@ instantiate_raft_neighbors_ivf_pq_extend(uint8_t, int64_t); float* distances) instantiate_raft_neighbors_ivf_pq_search(float, int64_t); -instantiate_raft_neighbors_ivf_pq_search(half, int64_t); instantiate_raft_neighbors_ivf_pq_search(int8_t, int64_t); instantiate_raft_neighbors_ivf_pq_search(uint8_t, int64_t); diff --git a/cpp/include/raft/sparse/linalg/spmm.hpp b/cpp/include/raft/sparse/linalg/spmm.hpp index 03c97fdb9d..1e815ba521 100644 --- a/cpp/include/raft/sparse/linalg/spmm.hpp +++ b/cpp/include/raft/sparse/linalg/spmm.hpp @@ -60,8 +60,11 @@ void spmm(raft::resources const& handle, { bool is_row_major = detail::is_row_major(y, z); - auto z_tmp_view = raft::make_device_strided_matrix_view( - z.data_handle(), z.extent(0), z.extent(1), is_row_major ? z.stride(0) : z.stride(1)); + auto z_tmp_view = + is_row_major ? raft::make_device_strided_matrix_view( + z.data_handle(), z.extent(0), z.extent(1), z.stride(0)) + : raft::make_device_strided_matrix_view( + z.data_handle(), z.extent(0), z.extent(1), z.stride(1)); auto descr_x = detail::create_descriptor(x); auto descr_y = detail::create_descriptor(y); @@ -79,4 +82,4 @@ void spmm(raft::resources const& handle, } // end namespace sparse } // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/util/device_loads_stores.cuh b/cpp/include/raft/util/device_loads_stores.cuh index 780939ee85..0b5b937245 100644 --- a/cpp/include/raft/util/device_loads_stores.cuh +++ b/cpp/include/raft/util/device_loads_stores.cuh @@ -17,7 +17,6 @@ #pragma once #include // uintX_t -#include #include #include // DI @@ -149,57 +148,6 @@ DI void sts(int32_t* addr, const int32_t (&x)[4]) : "l"(s4), "r"(x[0]), "r"(x[1]), "r"(x[2]), "r"(x[3])); } -DI void sts(half* addr, const half& x) -{ - auto s = __cvta_generic_to_shared(reinterpret_cast(addr)); - asm volatile("st.shared.u16 [%0], {%1};" : : "l"(s), "h"(*reinterpret_cast(&x))); -} -DI void sts(half* addr, const half (&x)[1]) -{ - auto s = __cvta_generic_to_shared(reinterpret_cast(addr)); - asm volatile("st.shared.u16 [%0], {%1};" : : "l"(s), "h"(*reinterpret_cast(x))); -} -DI void sts(half* addr, const half (&x)[2]) -{ - auto s = __cvta_generic_to_shared(reinterpret_cast(addr)); - asm volatile("st.shared.v2.u16 [%0], {%1, %2};" - : - : "l"(s), - "h"(*reinterpret_cast(x)), - "h"(*reinterpret_cast(x + 1))); -} -DI void sts(half* addr, const half (&x)[4]) -{ - auto s = __cvta_generic_to_shared(reinterpret_cast(addr)); - asm volatile("st.shared.v4.u16 [%0], {%1, %2, %3, %4};" - : - : "l"(s), - "h"(*reinterpret_cast(x)), - "h"(*reinterpret_cast(x + 1)), - "h"(*reinterpret_cast(x + 2)), - "h"(*reinterpret_cast(x + 3))); -} -DI void sts(half* addr, const half (&x)[8]) -{ - auto s = __cvta_generic_to_shared(reinterpret_cast(addr)); - half2 y[4]; - y[0].x = x[0]; - y[0].y = x[1]; - y[1].x = x[2]; - y[1].y = x[3]; - y[2].x = x[4]; - y[2].y = x[5]; - y[3].x = x[6]; - y[3].y = x[7]; - asm volatile("st.shared.v4.u32 [%0], {%1, %2, %3, %4};" - : - : "l"(s), - "r"(*reinterpret_cast(y)), - "r"(*reinterpret_cast(y + 1)), - "r"(*reinterpret_cast(y + 2)), - "r"(*reinterpret_cast(y + 3))); -} - DI void sts(float* addr, const float& x) { auto s1 = __cvta_generic_to_shared(reinterpret_cast(addr)); @@ -375,52 +323,6 @@ DI void lds(int32_t& x, const int32_t* addr) asm volatile("ld.shared.u32 {%0}, [%1];" : "=r"(x) : "l"(s1)); } -DI void lds(half& x, const half* addr) -{ - auto s = __cvta_generic_to_shared(reinterpret_cast(addr)); - asm volatile("ld.shared.u16 {%0}, [%1];" : "=h"(*reinterpret_cast(&x)) : "l"(s)); -} -DI void lds(half (&x)[1], const half* addr) -{ - auto s = __cvta_generic_to_shared(reinterpret_cast(addr)); - asm volatile("ld.shared.u16 {%0}, [%1];" : "=h"(*reinterpret_cast(x)) : "l"(s)); -} -DI void lds(half (&x)[2], const half* addr) -{ - auto s = __cvta_generic_to_shared(reinterpret_cast(addr)); - asm volatile("ld.shared.v2.u16 {%0, %1}, [%2];" - : "=h"(*reinterpret_cast(x)), "=h"(*reinterpret_cast(x + 1)) - : "l"(s)); -} -DI void lds(half (&x)[4], const half* addr) -{ - auto s = __cvta_generic_to_shared(reinterpret_cast(addr)); - asm volatile("ld.shared.v4.u16 {%0, %1, %2, %3}, [%4];" - : "=h"(*reinterpret_cast(x)), - "=h"(*reinterpret_cast(x + 1)), - "=h"(*reinterpret_cast(x + 2)), - "=h"(*reinterpret_cast(x + 3)) - : "l"(s)); -} -DI void lds(half (&x)[8], const half* addr) -{ - auto s = __cvta_generic_to_shared(reinterpret_cast(addr)); - half2 y[4]; - asm volatile("ld.shared.v4.u32 {%0, %1, %2, %3}, [%4];" - : "=r"(*reinterpret_cast(y)), - "=r"(*reinterpret_cast(y + 1)), - "=r"(*reinterpret_cast(y + 2)), - "=r"(*reinterpret_cast(y + 3)) - : "l"(s)); - x[0] = y[0].x; - x[1] = y[0].y; - x[2] = y[1].x; - x[3] = y[1].y; - x[4] = y[2].x; - x[5] = y[2].y; - x[6] = y[3].x; - x[7] = y[3].y; -} DI void lds(float& x, const float* addr) { auto s1 = __cvta_generic_to_shared(reinterpret_cast(addr)); @@ -508,52 +410,6 @@ DI void ldg(float (&x)[4], const float* addr) : "=f"(x[0]), "=f"(x[1]), "=f"(x[2]), "=f"(x[3]) : "l"(addr)); } -DI void ldg(half& x, const half* addr) -{ - asm volatile("ld.global.cg.u16 {%0}, [%1];" - : "=h"(*reinterpret_cast(&x)) - : "l"(reinterpret_cast(addr))); -} -DI void ldg(half (&x)[1], const half* addr) -{ - asm volatile("ld.global.cg.u16 {%0}, [%1];" - : "=h"(*reinterpret_cast(x)) - : "l"(reinterpret_cast(addr))); -} -DI void ldg(half (&x)[2], const half* addr) -{ - asm volatile("ld.global.cg.v2.u16 {%0, %1}, [%2];" - : "=h"(*reinterpret_cast(x)), "=h"(*reinterpret_cast(x + 1)) - : "l"(reinterpret_cast(addr))); -} -DI void ldg(half (&x)[4], const half* addr) -{ - asm volatile("ld.global.cg.v4.u16 {%0, %1, %2, %3}, [%4];" - : "=h"(*reinterpret_cast(x)), - "=h"(*reinterpret_cast(x + 1)), - "=h"(*reinterpret_cast(x + 2)), - "=h"(*reinterpret_cast(x + 3)) - : "l"(reinterpret_cast(addr))); -} - -DI void ldg(half (&x)[8], const half* addr) -{ - half2 y[4]; - asm volatile("ld.global.cg.v4.u32 {%0, %1, %2, %3}, [%4];" - : "=r"(*reinterpret_cast(y)), - "=r"(*reinterpret_cast(y + 1)), - "=r"(*reinterpret_cast(y + 2)), - "=r"(*reinterpret_cast(y + 3)) - : "l"(reinterpret_cast(addr))); - x[0] = y[0].x; - x[1] = y[0].y; - x[2] = y[1].x; - x[3] = y[1].y; - x[4] = y[2].x; - x[5] = y[2].y; - x[6] = y[3].x; - x[7] = y[3].y; -} DI void ldg(double& x, const double* addr) { asm volatile("ld.global.cg.f64 %0, [%1];" : "=d"(x) : "l"(addr)); diff --git a/cpp/include/raft/util/input_validation.hpp b/cpp/include/raft/util/input_validation.hpp index ab5264f900..1977b45281 100644 --- a/cpp/include/raft/util/input_validation.hpp +++ b/cpp/include/raft/util/input_validation.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,7 +39,8 @@ constexpr bool is_row_or_column_major(mdspan constexpr bool is_row_or_column_major(mdspan m) { - return m.is_exhaustive(); + return m.stride(0) == typename Extents::index_type(1) || + m.stride(1) == typename Extents::index_type(1); } template @@ -63,7 +64,7 @@ constexpr bool is_row_major(mdspan template constexpr bool is_row_major(mdspan m) { - return m.is_exhaustive() && m.stride(1) == typename Extents::index_type(1); + return m.stride(1) == typename Extents::index_type(1); } template @@ -87,7 +88,7 @@ constexpr bool is_col_major(mdspan template constexpr bool is_col_major(mdspan m) { - return m.is_exhaustive() && m.stride(0) == typename Extents::index_type(1); + return m.stride(0) == typename Extents::index_type(1); } template diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py index 6f8766c86b..3bf2cba8f2 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py @@ -90,11 +90,9 @@ "uint32_t", "float", ), # data_t, vec_idx_t, distance_t - half_uint32=("half", "uint32_t", "float"), int8_uint32=("int8_t", "uint32_t", "float"), uint8_uint32=("uint8_t", "uint32_t", "float"), float_uint64=("float", "uint64_t", "float"), - half_uint64=("half", "uint64_t", "float"), ) # knn for type_path, (data_t, idx_t, distance_t) in search_types.items(): diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu deleted file mode 100644 index fa89bca45f..0000000000 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu +++ /dev/null @@ -1,66 +0,0 @@ - -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by search_multi_cta_00_generate.py - * - * Make changes there and run in this directory: - * - * > python search_multi_cta_00_generate.py - * - */ - -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_kernel_selection( - 32, 1024, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection - -} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu deleted file mode 100644 index 645ca61ff5..0000000000 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu +++ /dev/null @@ -1,66 +0,0 @@ - -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by search_multi_cta_00_generate.py - * - * Make changes there and run in this directory: - * - * > python search_multi_cta_00_generate.py - * - */ - -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_kernel_selection( - 8, 128, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection - -} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu deleted file mode 100644 index 41b6f9b420..0000000000 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu +++ /dev/null @@ -1,66 +0,0 @@ - -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by search_multi_cta_00_generate.py - * - * Make changes there and run in this directory: - * - * > python search_multi_cta_00_generate.py - * - */ - -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_kernel_selection( - 16, 256, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection - -} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu deleted file mode 100644 index 38f0ac3b04..0000000000 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu +++ /dev/null @@ -1,66 +0,0 @@ - -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by search_multi_cta_00_generate.py - * - * Make changes there and run in this directory: - * - * > python search_multi_cta_00_generate.py - * - */ - -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_kernel_selection( - 32, 512, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection - -} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu deleted file mode 100644 index c462a9d359..0000000000 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu +++ /dev/null @@ -1,66 +0,0 @@ - -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by search_multi_cta_00_generate.py - * - * Make changes there and run in this directory: - * - * > python search_multi_cta_00_generate.py - * - */ - -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_kernel_selection( - 32, 1024, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection - -} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu deleted file mode 100644 index f5b2874e20..0000000000 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu +++ /dev/null @@ -1,66 +0,0 @@ - -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by search_multi_cta_00_generate.py - * - * Make changes there and run in this directory: - * - * > python search_multi_cta_00_generate.py - * - */ - -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_kernel_selection( - 8, 128, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection - -} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu deleted file mode 100644 index 0b01428b86..0000000000 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu +++ /dev/null @@ -1,66 +0,0 @@ - -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by search_multi_cta_00_generate.py - * - * Make changes there and run in this directory: - * - * > python search_multi_cta_00_generate.py - * - */ - -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_kernel_selection( - 16, 256, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection - -} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu deleted file mode 100644 index 70228a129d..0000000000 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu +++ /dev/null @@ -1,66 +0,0 @@ - -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by search_multi_cta_00_generate.py - * - * Make changes there and run in this directory: - * - * > python search_multi_cta_00_generate.py - * - */ - -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_kernel_selection( - 32, 512, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection - -} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py index 1515f43134..d1d976ee87 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py @@ -93,11 +93,9 @@ search_types = dict( float_uint32=("float", "uint32_t", "float"), # data_t, idx_t, distance_t - half_uint32=("half", "uint32_t", "float"), int8_uint32=("int8_t", "uint32_t", "float"), uint8_uint32=("uint8_t", "uint32_t", "float"), float_uint64=("float", "uint64_t", "float"), - half_uint64=("half", "uint64_t", "float"), ) # knn diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu deleted file mode 100644 index 29e7bfa250..0000000000 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu +++ /dev/null @@ -1,67 +0,0 @@ - -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by search_single_cta_00_generate.py - * - * Make changes there and run in this directory: - * - * > python search_single_cta_00_generate.py - * - */ - -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( - 32, 1024, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel - -} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu deleted file mode 100644 index a004f900d0..0000000000 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu +++ /dev/null @@ -1,67 +0,0 @@ - -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by search_single_cta_00_generate.py - * - * Make changes there and run in this directory: - * - * > python search_single_cta_00_generate.py - * - */ - -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( - 8, 128, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel - -} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu deleted file mode 100644 index 549849b21d..0000000000 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu +++ /dev/null @@ -1,67 +0,0 @@ - -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by search_single_cta_00_generate.py - * - * Make changes there and run in this directory: - * - * > python search_single_cta_00_generate.py - * - */ - -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( - 16, 256, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel - -} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu deleted file mode 100644 index 3825f572f7..0000000000 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu +++ /dev/null @@ -1,67 +0,0 @@ - -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by search_single_cta_00_generate.py - * - * Make changes there and run in this directory: - * - * > python search_single_cta_00_generate.py - * - */ - -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( - 32, 512, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel - -} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu deleted file mode 100644 index 31d83f443b..0000000000 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu +++ /dev/null @@ -1,67 +0,0 @@ - -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by search_single_cta_00_generate.py - * - * Make changes there and run in this directory: - * - * > python search_single_cta_00_generate.py - * - */ - -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( - 32, 1024, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel - -} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu deleted file mode 100644 index 3493ab294c..0000000000 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu +++ /dev/null @@ -1,67 +0,0 @@ - -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by search_single_cta_00_generate.py - * - * Make changes there and run in this directory: - * - * > python search_single_cta_00_generate.py - * - */ - -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( - 8, 128, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel - -} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu deleted file mode 100644 index 6e09709994..0000000000 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu +++ /dev/null @@ -1,67 +0,0 @@ - -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by search_single_cta_00_generate.py - * - * Make changes there and run in this directory: - * - * > python search_single_cta_00_generate.py - * - */ - -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( - 16, 256, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel - -} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu deleted file mode 100644 index 4bc0158f7e..0000000000 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu +++ /dev/null @@ -1,67 +0,0 @@ - -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by search_single_cta_00_generate.py - * - * Make changes there and run in this directory: - * - * > python search_single_cta_00_generate.py - * - */ - -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( - 32, 512, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel - -} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_half_half_int64_t.cu b/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_half_half_int64_t.cu deleted file mode 100644 index 3c467a12d8..0000000000 --- a/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_half_half_int64_t.cu +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include - -#define instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan( \ - T, AccT, IdxT, IvfSampleFilterT) \ - template void \ - raft::neighbors::ivf_flat::detail::ivfflat_interleaved_scan( \ - const raft::neighbors::ivf_flat::index& index, \ - const T* queries, \ - const uint32_t* coarse_query_results, \ - const uint32_t n_queries, \ - const uint32_t queries_offset, \ - const raft::distance::DistanceType metric, \ - const uint32_t n_probes, \ - const uint32_t k, \ - const bool select_min, \ - IvfSampleFilterT sample_filter, \ - IdxT* neighbors, \ - float* distances, \ - uint32_t& grid_dim_x, \ - rmm::cuda_stream_view stream) - -instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan( - half, half, int64_t, raft::neighbors::filtering::none_ivf_sample_filter); - -#undef instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan diff --git a/cpp/src/neighbors/detail/refine_host_half_float.cpp b/cpp/src/neighbors/detail/refine_host_half_float.cpp deleted file mode 100644 index d9fb2864fe..0000000000 --- a/cpp/src/neighbors/detail/refine_host_half_float.cpp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include - -#include - -#define instantiate_raft_neighbors_refine(IdxT, DataT, DistanceT, ExtentsT) \ - template void raft::neighbors::detail::refine_host( \ - raft::host_matrix_view dataset, \ - raft::host_matrix_view queries, \ - raft::host_matrix_view neighbor_candidates, \ - raft::host_matrix_view indices, \ - raft::host_matrix_view distances, \ - distance::DistanceType metric); - -instantiate_raft_neighbors_refine(int64_t, half, float, int64_t); - -#undef instantiate_raft_neighbors_refine diff --git a/cpp/src/neighbors/ivfpq_build_half_int64_t.cu b/cpp/src/neighbors/ivfpq_build_half_int64_t.cu deleted file mode 100644 index aacb2d7198..0000000000 --- a/cpp/src/neighbors/ivfpq_build_half_int64_t.cu +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include // raft::neighbors::ivf_pq::index - -#include - -#define instantiate_raft_neighbors_ivf_pq_build(T, IdxT) \ - template raft::neighbors::ivf_pq::index raft::neighbors::ivf_pq::build( \ - raft::resources const& handle, \ - const raft::neighbors::ivf_pq::index_params& params, \ - raft::device_matrix_view dataset); \ - \ - template auto raft::neighbors::ivf_pq::build( \ - raft::resources const& handle, \ - const raft::neighbors::ivf_pq::index_params& params, \ - const T* dataset, \ - IdxT n_rows, \ - uint32_t dim) \ - ->raft::neighbors::ivf_pq::index; - -instantiate_raft_neighbors_ivf_pq_build(half, int64_t); - -#undef instantiate_raft_neighbors_ivf_pq_build diff --git a/cpp/src/neighbors/ivfpq_extend_half_int64_t.cu b/cpp/src/neighbors/ivfpq_extend_half_int64_t.cu deleted file mode 100644 index 85477ca4a0..0000000000 --- a/cpp/src/neighbors/ivfpq_extend_half_int64_t.cu +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include // raft::neighbors::ivf_pq::index - -#include - -#define instantiate_raft_neighbors_ivf_pq_extend(T, IdxT) \ - template raft::neighbors::ivf_pq::index raft::neighbors::ivf_pq::extend( \ - raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - const raft::neighbors::ivf_pq::index& idx); \ - \ - template void raft::neighbors::ivf_pq::extend( \ - raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - raft::neighbors::ivf_pq::index* idx); \ - \ - template auto raft::neighbors::ivf_pq::extend( \ - raft::resources const& handle, \ - const raft::neighbors::ivf_pq::index& idx, \ - const T* new_vectors, \ - const IdxT* new_indices, \ - IdxT n_rows) \ - ->raft::neighbors::ivf_pq::index; \ - \ - template void raft::neighbors::ivf_pq::extend( \ - raft::resources const& handle, \ - raft::neighbors::ivf_pq::index* idx, \ - const T* new_vectors, \ - const IdxT* new_indices, \ - IdxT n_rows); - -instantiate_raft_neighbors_ivf_pq_extend(half, int64_t); - -#undef instantiate_raft_neighbors_ivf_pq_extend diff --git a/cpp/src/neighbors/ivfpq_search_half_int64_t.cu b/cpp/src/neighbors/ivfpq_search_half_int64_t.cu deleted file mode 100644 index c9f2e6fdd5..0000000000 --- a/cpp/src/neighbors/ivfpq_search_half_int64_t.cu +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include // raft::neighbors::ivf_pq::index - -#include - -#define instantiate_raft_neighbors_ivf_pq_search(T, IdxT) \ - template void raft::neighbors::ivf_pq::search( \ - raft::resources const& handle, \ - const raft::neighbors::ivf_pq::search_params& params, \ - const raft::neighbors::ivf_pq::index& idx, \ - raft::device_matrix_view queries, \ - raft::device_matrix_view neighbors, \ - raft::device_matrix_view distances); \ - \ - template void raft::neighbors::ivf_pq::search( \ - raft::resources const& handle, \ - const raft::neighbors::ivf_pq::search_params& params, \ - const raft::neighbors::ivf_pq::index& idx, \ - const T* queries, \ - uint32_t n_queries, \ - uint32_t k, \ - IdxT* neighbors, \ - float* distances, \ - rmm::mr::device_memory_resource* mr) - -instantiate_raft_neighbors_ivf_pq_search(half, int64_t); - -#undef instantiate_raft_neighbors_ivf_pq_search diff --git a/cpp/src/neighbors/refine_00_generate.py b/cpp/src/neighbors/refine_00_generate.py index fd11f4d5c3..ef51eae437 100644 --- a/cpp/src/neighbors/refine_00_generate.py +++ b/cpp/src/neighbors/refine_00_generate.py @@ -63,7 +63,6 @@ types = dict( float_float= ("float", "float"), - half_float= ("half", "float"), int8_t_float=("int8_t", "float"), uint8_t_float=("uint8_t", "float"), ) diff --git a/cpp/src/neighbors/refine_half_float.cu b/cpp/src/neighbors/refine_half_float.cu deleted file mode 100644 index c323951b82..0000000000 --- a/cpp/src/neighbors/refine_half_float.cu +++ /dev/null @@ -1,50 +0,0 @@ - -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by refine_00_generate.py - * - * Make changes there and run in this directory: - * - * > python refine_00_generate.py - * - */ - -#include - -#define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx) \ - template void raft::neighbors::refine( \ - raft::resources const& handle, \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view queries, \ - raft::device_matrix_view neighbor_candidates, \ - raft::device_matrix_view indices, \ - raft::device_matrix_view distances, \ - raft::distance::DistanceType metric); \ - \ - template void raft::neighbors::refine( \ - raft::resources const& handle, \ - raft::host_matrix_view dataset, \ - raft::host_matrix_view queries, \ - raft::host_matrix_view neighbor_candidates, \ - raft::host_matrix_view indices, \ - raft::host_matrix_view distances, \ - raft::distance::DistanceType metric); - -instantiate_raft_neighbors_refine(int64_t, half, float, int64_t); - -#undef instantiate_raft_neighbors_refine diff --git a/cpp/src/raft_runtime/neighbors/cagra_build.cu b/cpp/src/raft_runtime/neighbors/cagra_build.cu index 5fc472c952..fb139f22a6 100644 --- a/cpp/src/raft_runtime/neighbors/cagra_build.cu +++ b/cpp/src/raft_runtime/neighbors/cagra_build.cu @@ -19,8 +19,6 @@ #include #include -#include - namespace raft::runtime::neighbors::cagra { #define RAFT_INST_CAGRA_BUILD(T, IdxT) \ @@ -57,7 +55,6 @@ namespace raft::runtime::neighbors::cagra { } RAFT_INST_CAGRA_BUILD(float, uint32_t); -RAFT_INST_CAGRA_BUILD(half, uint32_t); RAFT_INST_CAGRA_BUILD(int8_t, uint32_t); RAFT_INST_CAGRA_BUILD(uint8_t, uint32_t); diff --git a/cpp/src/raft_runtime/neighbors/cagra_search.cu b/cpp/src/raft_runtime/neighbors/cagra_search.cu index 93244077f4..25a249bd76 100644 --- a/cpp/src/raft_runtime/neighbors/cagra_search.cu +++ b/cpp/src/raft_runtime/neighbors/cagra_search.cu @@ -17,8 +17,6 @@ #include #include -#include - namespace raft::runtime::neighbors::cagra { #define RAFT_INST_CAGRA_SEARCH(T, IdxT) \ @@ -33,7 +31,6 @@ namespace raft::runtime::neighbors::cagra { } RAFT_INST_CAGRA_SEARCH(float, uint32_t); -RAFT_INST_CAGRA_SEARCH(half, uint32_t); RAFT_INST_CAGRA_SEARCH(int8_t, uint32_t); RAFT_INST_CAGRA_SEARCH(uint8_t, uint32_t); diff --git a/cpp/src/raft_runtime/neighbors/cagra_serialize.cu b/cpp/src/raft_runtime/neighbors/cagra_serialize.cu index f386bcce8e..bf8e7bf6ee 100644 --- a/cpp/src/raft_runtime/neighbors/cagra_serialize.cu +++ b/cpp/src/raft_runtime/neighbors/cagra_serialize.cu @@ -22,8 +22,6 @@ #include #include -#include - namespace raft::runtime::neighbors::cagra { #define RAFT_INST_CAGRA_SERIALIZE(DTYPE) \ @@ -77,7 +75,6 @@ namespace raft::runtime::neighbors::cagra { } RAFT_INST_CAGRA_SERIALIZE(float); -RAFT_INST_CAGRA_SERIALIZE(half); RAFT_INST_CAGRA_SERIALIZE(int8_t); RAFT_INST_CAGRA_SERIALIZE(uint8_t); diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index fe29409d9b..931530b66a 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -362,11 +362,9 @@ if(BUILD_TESTS) NEIGHBORS_ANN_CAGRA_TEST PATH test/neighbors/ann_cagra/test_float_uint32_t.cu - test/neighbors/ann_cagra/test_half_uint32_t.cu test/neighbors/ann_cagra/test_int8_t_uint32_t.cu test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu test/neighbors/ann_cagra/test_float_int64_t.cu - test/neighbors/ann_cagra/test_half_int64_t.cu src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu @@ -375,14 +373,6 @@ if(BUILD_TESTS) src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu - src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu - src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu - src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu - src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu LIB EXPLICIT_INSTANTIATE_ONLY GPUS diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh index ef4f27ae64..21aed0b992 100644 --- a/cpp/test/neighbors/ann_cagra.cuh +++ b/cpp/test/neighbors/ann_cagra.cuh @@ -40,8 +40,6 @@ #include -#include - #include #include #include @@ -112,69 +110,42 @@ testing::AssertionResult CheckOrder(raft::host_matrix_view index_ return testing::AssertionSuccess(); } -template -struct fpi_mapper {}; - -template <> -struct fpi_mapper { - using type = int64_t; - static constexpr int kBitshiftBase = 53; -}; - -template <> -struct fpi_mapper { - using type = int32_t; - static constexpr int kBitshiftBase = 24; -}; - -template <> -struct fpi_mapper { - using type = int16_t; - static constexpr int kBitshiftBase = 11; -}; - // Generate dataset to ensure no rounding error occurs in the norm computation of any two vectors. // When testing the CAGRA index sorting function, rounding errors can affect the norm and alter the // order of the index. To ensure the accuracy of the test, we utilize the dataset. The generation // method is based on the error-free transformation (EFT) method. -template -RAFT_KERNEL GenerateRoundingErrorFreeDataset_kernel(T* const ptr, +RAFT_KERNEL GenerateRoundingErrorFreeDataset_kernel(float* const ptr, const uint32_t size, - const typename fpi_mapper::type resolution) + const uint32_t resolution) { const auto tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid >= size) { return; } - const float u32 = *reinterpret_cast::type*>(ptr + tid); + const float u32 = *reinterpret_cast(ptr + tid); ptr[tid] = u32 / resolution; } -template void GenerateRoundingErrorFreeDataset( const raft::resources& handle, - T* const ptr, + float* const ptr, const uint32_t n_row, const uint32_t dim, raft::random::RngState& rng, const bool diff_flag // true if compute the norm between two vectors ) { - using mapper_type = fpi_mapper; - using int_type = typename mapper_type::type; auto cuda_stream = resource::get_cuda_stream(handle); const uint32_t size = n_row * dim; const uint32_t block_size = 256; const uint32_t grid_size = (size + block_size - 1) / block_size; - const auto bitshift = (mapper_type::kBitshiftBase - std::log2(dim) - (diff_flag ? 1 : 0)) / 2; - // Skip the test when `dim` is too big for type `T` to allow rounding error-free test. - if (bitshift <= 1) { GTEST_SKIP(); } - const int_type resolution = int_type{1} << static_cast(std::floor(bitshift)); - raft::random::uniformInt( - handle, rng, reinterpret_cast(ptr), size, -resolution, resolution - 1); + const int32_t resolution = + 1 << static_cast(std::floor((24 - std::log2(dim) - (diff_flag ? 1 : 0)) / 2)); + raft::random::uniformInt( + handle, rng, reinterpret_cast(ptr), size, -resolution, resolution - 1); - GenerateRoundingErrorFreeDataset_kernel - <<>>(ptr, size, resolution); + GenerateRoundingErrorFreeDataset_kernel<<>>( + ptr, size, resolution); } } // namespace @@ -259,7 +230,6 @@ class AnnCagraTest : public ::testing::TestWithParam { search_params.algo = ps.algo; search_params.max_queries = ps.max_queries; search_params.team_size = ps.team_size; - search_params.itopk_size = ps.itopk_size; auto database_view = raft::make_device_matrix_view( (const DataT*)database.data(), ps.n_rows, ps.dim); @@ -330,7 +300,7 @@ class AnnCagraTest : public ::testing::TestWithParam { database.resize(((size_t)ps.n_rows) * ps.dim, stream_); search_queries.resize(ps.n_queries * ps.dim, stream_); raft::random::RngState r(1234ULL); - if constexpr (std::is_same_v || std::is_same_v) { + if constexpr (std::is_same{}) { GenerateRoundingErrorFreeDataset(handle_, database.data(), ps.n_rows, ps.dim, r, true); GenerateRoundingErrorFreeDataset( handle_, search_queries.data(), ps.n_queries, ps.dim, r, true); @@ -419,7 +389,7 @@ class AnnCagraSortTest : public ::testing::TestWithParam { { database.resize(((size_t)ps.n_rows) * ps.dim, handle_.get_stream()); raft::random::RngState r(1234ULL); - if constexpr (std::is_same_v || std::is_same_v) { + if constexpr (std::is_same{}) { GenerateRoundingErrorFreeDataset(handle_, database.data(), ps.n_rows, ps.dim, r, false); } else { raft::random::uniformInt( @@ -497,7 +467,6 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { search_params.algo = ps.algo; search_params.max_queries = ps.max_queries; search_params.team_size = ps.team_size; - search_params.itopk_size = ps.itopk_size; search_params.hashmap_mode = cagra::hash_mode::HASH; auto database_view = raft::make_device_matrix_view( @@ -613,7 +582,6 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { search_params.algo = ps.algo; search_params.max_queries = ps.max_queries; search_params.team_size = ps.team_size; - search_params.itopk_size = ps.itopk_size; search_params.hashmap_mode = cagra::hash_mode::HASH; auto database_view = raft::make_device_matrix_view( @@ -688,7 +656,7 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { database.resize(((size_t)ps.n_rows) * ps.dim, stream_); search_queries.resize(ps.n_queries * ps.dim, stream_); raft::random::RngState r(1234ULL); - if constexpr (std::is_same_v || std::is_same_v) { + if constexpr (std::is_same{}) { GenerateRoundingErrorFreeDataset(handle_, database.data(), ps.n_rows, ps.dim, r, true); GenerateRoundingErrorFreeDataset( handle_, search_queries.data(), ps.n_queries, ps.dim, r, true); @@ -821,23 +789,6 @@ inline std::vector generate_inputs() {0.995}); inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); - inputs2 = - raft::util::itertools::product({100}, - {20000}, - {32}, - {2048}, // k - {graph_build_algo::NN_DESCENT}, - {search_algo::AUTO}, - {10}, - {0}, - {4096}, // itopk_size - {1}, - {raft::distance::DistanceType::L2Expanded}, - {false}, - {false}, - {0.995}); - inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); - return inputs; } diff --git a/cpp/test/neighbors/ann_cagra/test_half_int64_t.cu b/cpp/test/neighbors/ann_cagra/test_half_int64_t.cu deleted file mode 100644 index fdd510bc5d..0000000000 --- a/cpp/test/neighbors/ann_cagra/test_half_int64_t.cu +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "../ann_cagra.cuh" -#include "search_kernel_uint64_t.cuh" - -namespace raft::neighbors::cagra { - -typedef AnnCagraTest AnnCagraTestH_I64; -TEST_P(AnnCagraTestH_I64, AnnCagra) { this->testCagra(); } - -INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestH_I64, ::testing::ValuesIn(inputs)); - -} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra/test_half_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_half_uint32_t.cu deleted file mode 100644 index dea6ae0c23..0000000000 --- a/cpp/test/neighbors/ann_cagra/test_half_uint32_t.cu +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "../ann_cagra.cuh" - -namespace raft::neighbors::cagra { - -typedef AnnCagraTest AnnCagraTestH_U32; -TEST_P(AnnCagraTestH_U32, AnnCagra) { this->testCagra(); } - -typedef AnnCagraSortTest AnnCagraSortTestH_U32; -TEST_P(AnnCagraSortTestH_U32, AnnCagraSort) { this->testCagraSort(); } - -typedef AnnCagraFilterTest AnnCagraFilterTestH_U32; -TEST_P(AnnCagraFilterTestH_U32, AnnCagraFilter) -{ - this->testCagraFilter(); - this->testCagraRemoved(); -} - -INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestH_U32, ::testing::ValuesIn(inputs)); -INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestH_U32, ::testing::ValuesIn(inputs)); -INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest, AnnCagraFilterTestH_U32, ::testing::ValuesIn(inputs)); - -} // namespace raft::neighbors::cagra