diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6107b9325a..cbae4bfb3f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -372,6 +372,102 @@ if(RAFT_COMPILE_LIBRARY) src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu src/neighbors/detail/ivf_flat_interleaved_scan_float_float_int64_t.cu src/neighbors/detail/ivf_flat_interleaved_scan_half_half_int64_t.cu src/neighbors/detail/ivf_flat_interleaved_scan_int8_t_int32_t_int64_t.cu diff --git a/cpp/include/raft/neighbors/dataset.hpp b/cpp/include/raft/neighbors/dataset.hpp index e7a3ba97a4..a6444775f4 100644 --- a/cpp/include/raft/neighbors/dataset.hpp +++ b/cpp/include/raft/neighbors/dataset.hpp @@ -72,7 +72,7 @@ struct strided_dataset : public dataset { return static_cast(v.stride(0) > 0 ? v.stride(0) : v.extent(1)); } /** Get the view of the data. */ - [[nodiscard]] virtual auto view() const noexcept -> view_type; + [[nodiscard]] virtual auto view() const noexcept -> view_type = 0; }; template diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index 0832e75633..d30f69ddcd 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -16,6 +16,7 @@ #pragma once +#include "compute_distance_vpq.cuh" #include "factory.cuh" #include "search_plan.cuh" #include "search_single_cta.cuh" @@ -77,46 +78,24 @@ inline return filter; } -/** - * @brief Search ANN using the constructed index. - * - * See the [build](#build) documentation for a usage example. - * - * @tparam T data element type - * @tparam IdxT type of database vector indices - * @tparam internal_IdxT during search we map IdxT to internal_IdxT, this way we do not need - * separate kernels for int/uint. - * - * @param[in] handle - * @param[in] params configure the search - * @param[in] idx ivf-pq constructed index - * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] - * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset - * [n_queries, k] - * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, - * k] - */ - -template -void search_main(raft::resources const& res, - search_params params, - const index& index, - raft::device_matrix_view queries, - raft::device_matrix_view neighbors, - raft::device_matrix_view distances, - CagraSampleFilterT sample_filter = CagraSampleFilterT()) +template +void search_main_core( + raft::resources const& res, + search_params params, + DatasetDescriptorT dataset_desc, + raft::device_matrix_view graph, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + CagraSampleFilterT sample_filter = CagraSampleFilterT()) { RAFT_LOG_DEBUG("# dataset size = %lu, dim = %lu\n", - static_cast(index.dataset().extent(0)), - static_cast(index.dataset().extent(1))); + static_cast(index.data().n_rows()), + static_cast(index.data().dim())); RAFT_LOG_DEBUG("# query size = %lu, dim = %lu\n", static_cast(queries.extent(0)), static_cast(queries.extent(1))); - RAFT_EXPECTS(queries.extent(1) == index.dim(), "Queries and index dim must match"); + RAFT_EXPECTS(queries.extent(1) == dataset_desc.dim, "Queries and index dim must match"); const uint32_t topk = neighbors.extent(1); cudaDeviceProp deviceProp = resource::get_device_properties(res); @@ -125,12 +104,15 @@ void search_main(raft::resources const& res, } common::nvtx::range fun_scope( - "cagra::search(max_queries = %u, k = %u, dim = %zu)", params.max_queries, topk, index.dim()); + "cagra::search(max_queries = %u, k = %u, dim = %zu)", + params.max_queries, + topk, + dataset_desc.dim); using CagraSampleFilterT_s = typename CagraSampleFilterT_Selector::type; - std::unique_ptr> plan = - factory::create( - res, params, index.dim(), index.graph_degree(), topk); + std::unique_ptr> plan = + factory::create( + res, params, dataset_desc.dim, graph.extent(1), topk); plan->check(topk); @@ -140,30 +122,22 @@ void search_main(raft::resources const& res, for (unsigned qid = 0; qid < queries.extent(0); qid += max_queries) { const uint32_t n_queries = std::min(max_queries, queries.extent(0) - qid); - internal_IdxT* _topk_indices_ptr = - reinterpret_cast(neighbors.data_handle()) + (topk * qid); - DistanceT* _topk_distances_ptr = distances.data_handle() + (topk * qid); + auto _topk_indices_ptr = + reinterpret_cast(neighbors.data_handle()) + + (topk * qid); + auto _topk_distances_ptr = distances.data_handle() + (topk * qid); // todo(tfeher): one could keep distances optional and pass nullptr - const T* _query_ptr = queries.data_handle() + (query_dim * qid); - const internal_IdxT* _seed_ptr = + const auto* _query_ptr = queries.data_handle() + (query_dim * qid); + const auto* _seed_ptr = plan->num_seeds > 0 - ? reinterpret_cast(plan->dev_seed.data()) + (plan->num_seeds * qid) + ? reinterpret_cast(plan->dev_seed.data()) + + (plan->num_seeds * qid) : nullptr; uint32_t* _num_executed_iterations = nullptr; - auto dataset_internal = - make_device_strided_matrix_view(index.dataset().data_handle(), - index.dataset().extent(0), - index.dataset().extent(1), - index.dataset().stride(0)); - auto graph_internal = raft::make_device_matrix_view( - reinterpret_cast(index.graph().data_handle()), - index.graph().extent(0), - index.graph().extent(1)); - (*plan)(res, - dataset_internal, - graph_internal, + dataset_desc, + graph, _topk_indices_ptr, _topk_distances_ptr, _query_ptr, @@ -173,6 +147,146 @@ void search_main(raft::resources const& res, topk, set_offset(sample_filter, qid)); } +} + +template +void launch_vpq_search_main_core( + raft::resources const& res, + const vpq_dataset* vpq_dset, + search_params params, + raft::device_matrix_view graph, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + CagraSampleFilterT sample_filter) +{ + RAFT_EXPECTS(vpq_dset->pq_bits() == 8, "Only pq_bits = 8 is supported for now"); + RAFT_EXPECTS(vpq_dset->pq_len() == 2, "Only pq_len 2 is supported for now"); + RAFT_EXPECTS(vpq_dset->dim() % vpq_dset->pq_dim() == 0, + "dim must be a multiple of pq_dim at the moment"); + + const float vq_scale = 1.0f; + const float pq_scale = 1.0f; + + if (vpq_dset->pq_bits() == 8) { + if (vpq_dset->pq_len() == 2) { + using dataset_desc_t = cagra_q_dataset_descriptor_t; + dataset_desc_t dataset_desc(vpq_dset->data.data_handle(), + vpq_dset->encoded_row_length(), + vpq_dset->pq_dim(), + vpq_dset->vq_code_book.data_handle(), + vq_scale, + vpq_dset->pq_code_book.data_handle(), + pq_scale, + size_t(vpq_dset->n_rows()), + vpq_dset->dim()); + search_main_core( + res, params, dataset_desc, graph, queries, neighbors, distances, sample_filter); + } else if (vpq_dset->pq_len() == 4) { + using dataset_desc_t = cagra_q_dataset_descriptor_t; + dataset_desc_t dataset_desc(vpq_dset->data.data_handle(), + vpq_dset->encoded_row_length(), + vpq_dset->pq_dim(), + vpq_dset->vq_code_book.data_handle(), + vq_scale, + vpq_dset->pq_code_book.data_handle(), + pq_scale, + size_t(vpq_dset->n_rows()), + vpq_dset->dim()); + search_main_core( + res, params, dataset_desc, graph, queries, neighbors, distances, sample_filter); + } else { + RAFT_FAIL("Subspace dimension must be 2 or 4"); + } + } else { + RAFT_FAIL("Only 8-bit PQ is supported now"); + } +} + +/** + * @brief Search ANN using the constructed index. + * + * See the [build](#build) documentation for a usage example. + * + * @tparam T data element type + * @tparam IdxT type of database vector indices + * @tparam internal_IdxT during search we map IdxT to internal_IdxT, this way we do not need + * separate kernels for int/uint. + * + * @param[in] handle + * @param[in] params configure the search + * @param[in] idx ivf-pq constructed index + * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] + * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset + * [n_queries, k] + * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, + * k] + */ +template +void search_main(raft::resources const& res, + search_params params, + const index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + CagraSampleFilterT sample_filter = CagraSampleFilterT()) +{ + const auto& graph = index.graph(); + auto graph_internal = raft::make_device_matrix_view( + reinterpret_cast(graph.data_handle()), graph.extent(0), graph.extent(1)); + + // n_rows has the same type as the dataset index (the array extents type) + using ds_idx_type = decltype(index.data().n_rows()); + // Dispatch search parameters based on the dataset kind. + if (auto* strided_dset = dynamic_cast*>(&index.data()); + strided_dset != nullptr) { + // Set TEAM_SIZE and DATASET_BLOCK_SIZE to zero tentatively since these parameters cannot be + // determined here. They are set just before kernel launch. + using dataset_desc_t = standard_dataset_descriptor_t; + // Search using a plain (strided) row-major dataset + const dataset_desc_t dataset_desc(strided_dset->view().data_handle(), + strided_dset->n_rows(), + strided_dset->dim(), + strided_dset->stride()); + + search_main_core( + res, params, dataset_desc, graph_internal, queries, neighbors, distances, sample_filter); + } else if (auto* vpq_dset = dynamic_cast*>(&index.data()); + vpq_dset != nullptr) { + // Search using a compressed dataset + RAFT_FAIL("FP32 VPQ dataset support is coming soon"); + } else if (auto* vpq_dset = dynamic_cast*>(&index.data()); + vpq_dset != nullptr) { + launch_vpq_search_main_core( + res, vpq_dset, params, graph_internal, queries, neighbors, distances, sample_filter); + } else if (auto* empty_dset = dynamic_cast*>(&index.data()); + empty_dset != nullptr) { + // Forgot to add a dataset. + RAFT_FAIL( + "Attempted to search without a dataset. Please call index.update_dataset(...) first."); + } else { + // This is a logic error. + RAFT_FAIL("Unrecognized dataset format"); + } static_assert(std::is_same_v, "only float distances are supported at the moment"); diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp index 3732dcf3fe..49e14be73d 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp @@ -20,6 +20,7 @@ #include "utils.hpp" #include +#include #include @@ -36,152 +37,16 @@ _RAFT_DEVICE constexpr unsigned get_vlen() return utils::size_of() / utils::size_of(); } -template -struct data_load_t { - union { - LOAD_T load; - DATA_T data[VLEN]; - }; -}; - -template -struct distance_op; -template -struct distance_op { - const float* const query_buffer; - __device__ distance_op(const float* const query_buffer) : query_buffer(query_buffer) {} - - __device__ DISTANCE_T operator()(const DATA_T* const dataset_ptr, - const std::uint32_t dataset_dim, - const bool valid) - { - const unsigned lane_id = threadIdx.x % TEAM_SIZE; - constexpr unsigned vlen = get_vlen(); - constexpr unsigned reg_nelem = - (DATASET_BLOCK_DIM + (TEAM_SIZE * vlen) - 1) / (TEAM_SIZE * vlen); - data_load_t dl_buff[reg_nelem]; - - DISTANCE_T norm2 = 0; - if (valid) { - for (uint32_t elem_offset = 0; elem_offset < dataset_dim; elem_offset += DATASET_BLOCK_DIM) { -#pragma unroll - for (uint32_t e = 0; e < reg_nelem; e++) { - const uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset; - if (k >= dataset_dim) break; - dl_buff[e].load = *reinterpret_cast(dataset_ptr + k); - } -#pragma unroll - for (uint32_t e = 0; e < reg_nelem; e++) { - const uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset; - if (k >= dataset_dim) break; -#pragma unroll - for (uint32_t v = 0; v < vlen; v++) { - const uint32_t kv = k + v; - // if (kv >= dataset_dim) break; - DISTANCE_T diff = query_buffer[device::swizzling(kv)]; - diff -= spatial::knn::detail::utils::mapping{}(dl_buff[e].data[v]); - norm2 += diff * diff; - } - } - } - } - for (uint32_t offset = TEAM_SIZE / 2; offset > 0; offset >>= 1) { - norm2 += __shfl_xor_sync(0xffffffff, norm2, offset); - } - return norm2; - } -}; -template -struct distance_op { - static constexpr unsigned N_FRAGS = (DATASET_BLOCK_DIM + TEAM_SIZE - 1) / TEAM_SIZE; - float query_frags[N_FRAGS]; - - __device__ distance_op(const float* const query_buffer) - { - constexpr unsigned vlen = get_vlen(); - constexpr unsigned reg_nelem = - (DATASET_BLOCK_DIM + (TEAM_SIZE * vlen) - 1) / (TEAM_SIZE * vlen); - const std::uint32_t lane_id = threadIdx.x % TEAM_SIZE; - // Pre-load query vectors into registers when register usage is not too large. -#pragma unroll - for (unsigned e = 0; e < reg_nelem; e++) { - const unsigned k = (lane_id + (TEAM_SIZE * e)) * vlen; - // if (k >= dataset_dim) break; -#pragma unroll - for (unsigned v = 0; v < vlen; v++) { - const unsigned kv = k + v; - const unsigned ev = (vlen * e) + v; - query_frags[ev] = query_buffer[device::swizzling(kv)]; - } - } - } - - __device__ DISTANCE_T operator()(const DATA_T* const dataset_ptr, - const std::uint32_t dataset_dim, - const bool valid) - { - const unsigned lane_id = threadIdx.x % TEAM_SIZE; - constexpr unsigned vlen = get_vlen(); - constexpr unsigned reg_nelem = - (DATASET_BLOCK_DIM + (TEAM_SIZE * vlen) - 1) / (TEAM_SIZE * vlen); - data_load_t dl_buff[reg_nelem]; - - DISTANCE_T norm2 = 0; - if (valid) { -#pragma unroll - for (unsigned e = 0; e < reg_nelem; e++) { - const unsigned k = (lane_id + (TEAM_SIZE * e)) * vlen; - if (k >= dataset_dim) break; - dl_buff[e].load = *reinterpret_cast(dataset_ptr + k); - } -#pragma unroll - for (unsigned e = 0; e < reg_nelem; e++) { - const unsigned k = (lane_id + (TEAM_SIZE * e)) * vlen; - if (k >= dataset_dim) break; -#pragma unroll - for (unsigned v = 0; v < vlen; v++) { - DISTANCE_T diff; - const unsigned ev = (vlen * e) + v; - diff = query_frags[ev]; - diff -= spatial::knn::detail::utils::mapping{}(dl_buff[e].data[v]); - norm2 += diff * diff; - } - } - } - for (uint32_t offset = TEAM_SIZE / 2; offset > 0; offset >>= 1) { - norm2 += __shfl_xor_sync(0xffffffff, norm2, offset); - } - return norm2; - } -}; - template _RAFT_DEVICE void compute_distance_to_random_nodes( INDEX_T* const result_indices_ptr, // [num_pickup] DISTANCE_T* const result_distances_ptr, // [num_pickup] - const float* const query_buffer, - const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] - const std::size_t dataset_dim, - const std::size_t dataset_size, - const std::size_t dataset_ld, + const typename DATASET_DESCRIPTOR_T::QUERY_T* const query_buffer, + const DATASET_DESCRIPTOR_T& dataset_desc, const std::size_t num_pickup, const unsigned num_distilation, const uint64_t rand_xor_mask, @@ -195,9 +60,6 @@ _RAFT_DEVICE void compute_distance_to_random_nodes( uint32_t max_i = num_pickup; if (max_i % (32 / TEAM_SIZE)) { max_i += (32 / TEAM_SIZE) - (max_i % (32 / TEAM_SIZE)); } - distance_op dist_op( - query_buffer); - for (uint32_t i = threadIdx.x / TEAM_SIZE; i < max_i; i += blockDim.x / TEAM_SIZE) { const bool valid_i = (i < num_pickup); @@ -212,11 +74,12 @@ _RAFT_DEVICE void compute_distance_to_random_nodes( if (seed_ptr && (gid < num_seeds)) { seed_index = seed_ptr[gid]; } else { - seed_index = device::xorshift64(gid ^ rand_xor_mask) % dataset_size; + seed_index = device::xorshift64(gid ^ rand_xor_mask) % dataset_desc.size; } } - const auto norm2 = dist_op(dataset_ptr + dataset_ld * seed_index, dataset_dim, valid_i); + const auto norm2 = dataset_desc.template compute_similarity( + query_buffer, seed_index, valid_i); if (valid_i && (norm2 < best_norm2_team_local)) { best_norm2_team_local = norm2; @@ -240,27 +103,25 @@ _RAFT_DEVICE void compute_distance_to_random_nodes( template -_RAFT_DEVICE void compute_distance_to_child_nodes(INDEX_T* const result_child_indices_ptr, - DISTANCE_T* const result_child_distances_ptr, - // query - const float* const query_buffer, - // [dataset_dim, dataset_size] - const DATA_T* const dataset_ptr, - const std::size_t dataset_dim, - const std::size_t dataset_ld, - // [knn_k, dataset_size] - const INDEX_T* const knn_graph, - const std::uint32_t knn_k, - // hashmap - INDEX_T* const visited_hashmap_ptr, - const std::uint32_t hash_bitlen, - const INDEX_T* const parent_indices, - const INDEX_T* const internal_topk_list, - const std::uint32_t search_width) +_RAFT_DEVICE void compute_distance_to_child_nodes( + INDEX_T* const result_child_indices_ptr, + DISTANCE_T* const result_child_distances_ptr, + // query + const typename DATASET_DESCRIPTOR_T::QUERY_T* const query_buffer, + // [dataset_dim, dataset_size] + const DATASET_DESCRIPTOR_T& dataset_desc, + // [knn_k, dataset_size] + const INDEX_T* const knn_graph, + const std::uint32_t knn_k, + // hashmap + INDEX_T* const visited_hashmap_ptr, + const std::uint32_t hash_bitlen, + const INDEX_T* const parent_indices, + const INDEX_T* const internal_topk_list, + const std::uint32_t search_width) { constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; const INDEX_T invalid_index = utils::get_max_value(); @@ -281,16 +142,6 @@ _RAFT_DEVICE void compute_distance_to_child_nodes(INDEX_T* const result_child_in } result_child_indices_ptr[i] = child_id; } - - // [Notice] - // Loading the query vector here from shared memory into registers reduces - // shared memory trafiic. However, register usage increase. The - // MAX_N_FRAGS below is used as the threshold to enable or disable this, - // but the appropriate value should be discussed. - constexpr unsigned N_FRAGS = (DATASET_BLOCK_DIM + TEAM_SIZE - 1) / TEAM_SIZE; - constexpr bool use_fragment = N_FRAGS <= MAX_N_FRAGS; - distance_op dist_op( - query_buffer); __syncthreads(); // Compute the distance to child nodes @@ -302,8 +153,8 @@ _RAFT_DEVICE void compute_distance_to_child_nodes(INDEX_T* const result_child_in INDEX_T child_id = invalid_index; if (valid_i) { child_id = result_child_indices_ptr[i]; } - DISTANCE_T norm2 = - dist_op(dataset_ptr + child_id * dataset_ld, dataset_dim, child_id != invalid_index); + const auto norm2 = dataset_desc.template compute_similarity( + query_buffer, child_id, child_id != invalid_index); // Store the distance const unsigned lane_id = threadIdx.x % TEAM_SIZE; @@ -318,4 +169,101 @@ _RAFT_DEVICE void compute_distance_to_child_nodes(INDEX_T* const result_child_in } } // namespace device + +template +struct dataset_descriptor_base_t { + using INDEX_T = INDEX_T_; + using QUERY_T = QUERY_T_; + using DISTANCE_T = DISTANCE_T_; + + const INDEX_T size; + const std::uint32_t dim; + + dataset_descriptor_base_t(const INDEX_T size, const std::uint32_t dim) : size(size), dim(dim) {} +}; + +template +struct standard_dataset_descriptor_t + : public dataset_descriptor_base_t { + using LOAD_T = device::LOAD_128BIT_T; + using DATA_T = DATA_T_; + using QUERY_T = typename dataset_descriptor_base_t::QUERY_T; + + const DATA_T* const ptr; + const std::size_t ld; + using dataset_descriptor_base_t::size; + using dataset_descriptor_base_t::dim; + + standard_dataset_descriptor_t(const DATA_T* const ptr, + const std::size_t size, + const std::uint32_t dim, + const std::size_t ld) + : dataset_descriptor_base_t(size, dim), ptr(ptr), ld(ld) + { + } + + static const std::uint32_t smem_buffer_size_in_byte = 0; + __device__ void set_smem_ptr(void* const){}; + + template + __device__ void copy_query(const DATA_T* const dmem_query_ptr, + QUERY_T* const smem_query_ptr, + const std::uint32_t query_smem_buffer_length) + { + for (unsigned i = threadIdx.x; i < query_smem_buffer_length; i += blockDim.x) { + unsigned j = device::swizzling(i); + if (i < dim) { + smem_query_ptr[j] = spatial::knn::detail::utils::mapping{}(dmem_query_ptr[i]); + } else { + smem_query_ptr[j] = 0.0; + } + } + } + + template + __device__ DISTANCE_T compute_similarity(const QUERY_T* const query_ptr, + const INDEX_T dataset_i, + const bool valid) const + { + const auto dataset_ptr = ptr + dataset_i * ld; + const unsigned lane_id = threadIdx.x % TEAM_SIZE; + constexpr unsigned vlen = device::get_vlen(); + // #include (DATASET_BLOCK_DIM, TEAM_SIZE * vlen); + raft::TxN_t dl_buff[reg_nelem]; + + DISTANCE_T norm2 = 0; + if (valid) { + for (uint32_t elem_offset = 0; elem_offset < dim; elem_offset += DATASET_BLOCK_DIM) { +#pragma unroll + for (uint32_t e = 0; e < reg_nelem; e++) { + const uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset; + if (k >= dim) break; + dl_buff[e].load(dataset_ptr, k); + } +#pragma unroll + for (uint32_t e = 0; e < reg_nelem; e++) { + const uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset; + if (k >= dim) break; +#pragma unroll + for (uint32_t v = 0; v < vlen; v++) { + const uint32_t kv = k + v; + // Note this loop can go above the dataset_dim for padded arrays. This is not a problem + // because: + // - Above the last element (dataset_dim-1), the query array is filled with zeros. + // - The data buffer has to be also padded with zeros. + DISTANCE_T diff = query_ptr[device::swizzling(kv)]; + diff -= spatial::knn::detail::utils::mapping{}(dl_buff[e].val.data[v]); + norm2 += diff * diff; + } + } + } + } + for (uint32_t offset = TEAM_SIZE / 2; offset > 0; offset >>= 1) { + norm2 += __shfl_xor_sync(0xffffffff, norm2, offset); + } + return norm2; + } +}; + } // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh new file mode 100644 index 0000000000..0204addba7 --- /dev/null +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "compute_distance.hpp" + +#include + +namespace raft::neighbors::cagra::detail { +template +struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t { + using LOAD_T = device::LOAD_128BIT_T; + using DATA_T = DATA_T_; + using CODE_BOOK_T = CODE_BOOK_T_; + using QUERY_T = typename dataset_descriptor_base_t::QUERY_T; + + const std::uint8_t* encoded_dataset_ptr; + const std::uint32_t encoded_dataset_dim; + const std::uint32_t n_subspace; + const CODE_BOOK_T* vq_code_book_ptr; + const float vq_scale; + const CODE_BOOK_T* pq_code_book_ptr; + const float pq_scale; + using dataset_descriptor_base_t::size; + using dataset_descriptor_base_t::dim; + + // Set on device + CODE_BOOK_T* smem_pq_code_book_ptr; + static const std::uint32_t smem_buffer_size_in_byte = + (1 << PQ_BITS) * PQ_LEN * utils::size_of(); + + __device__ void set_smem_ptr(void* const smem_ptr) + { + smem_pq_code_book_ptr = reinterpret_cast(smem_ptr); + + // Copy PQ table + if constexpr (std::is_same::value) { + for (unsigned i = threadIdx.x * 2; i < (1 << PQ_BITS) * PQ_LEN; i += blockDim.x * 2) { + half2 buf2; + buf2.x = pq_code_book_ptr[i]; + buf2.y = pq_code_book_ptr[i + 1]; + (reinterpret_cast(smem_pq_code_book_ptr + i))[0] = buf2; + } + } else { + for (unsigned i = threadIdx.x; i < (1 << PQ_BITS) * PQ_LEN; i += blockDim.x) { + // TODO: vectorize + smem_pq_code_book_ptr[i] = pq_code_book_ptr[i]; + } + } + } + + cagra_q_dataset_descriptor_t(const std::uint8_t* encoded_dataset_ptr, + const std::uint32_t encoded_dataset_dim, + const std::uint32_t n_subspace, + const CODE_BOOK_T* const vq_code_book_ptr, + const float vq_scale, + const CODE_BOOK_T* const pq_code_book_ptr, + const float pq_scale, + const std::size_t size, + const std::uint32_t dim) + : dataset_descriptor_base_t(size, dim), + encoded_dataset_ptr(encoded_dataset_ptr), + encoded_dataset_dim(encoded_dataset_dim), + n_subspace(n_subspace), + vq_code_book_ptr(vq_code_book_ptr), + vq_scale(vq_scale), + pq_code_book_ptr(pq_code_book_ptr), + pq_scale(pq_scale) + { + } + + template + __device__ void copy_query(const DATA_T* const dmem_query_ptr, + QUERY_T* const smem_query_ptr, + const std::uint32_t query_smem_buffer_length) + { + constexpr spatial::knn::detail::utils::mapping mapping{}; + for (unsigned i = threadIdx.x * 2; i < dim; i += blockDim.x * 2) { + half2 buf2{0, 0}; + if (i < dim) { buf2.x = mapping(dmem_query_ptr[i]); } + if (i + 1 < dim) { buf2.y = mapping(dmem_query_ptr[i + 1]); } + if ((PQ_BITS == 8) && (PQ_LEN % 2 == 0)) { + // Use swizzling in the condition to reduce bank conflicts in shared + // memory, which are likely to occur when pq_code_book_dim is large. + ((half2*)smem_query_ptr)[device::swizzling(i / 2)] = + buf2; + } else { + (reinterpret_cast(smem_query_ptr + i))[0] = buf2; + } + } + } + + template + __device__ DISTANCE_T compute_similarity(const QUERY_T* const query_ptr, + const INDEX_T node_id, + const bool valid) const + { + float norm = 0; + if (valid) { + const unsigned lane_id = threadIdx.x % TEAM_SIZE; + const uint32_t vq_code = *(reinterpret_cast( + encoded_dataset_ptr + (static_cast(encoded_dataset_dim) * node_id))); + if (PQ_BITS == 8) { + for (uint32_t elem_offset = 0; elem_offset < dim; elem_offset += DATASET_BLOCK_DIM) { + constexpr unsigned vlen = 4; // **** DO NOT CHANGE **** + constexpr unsigned nelem = + raft::div_rounding_up_unsafe(DATASET_BLOCK_DIM / PQ_LEN, TEAM_SIZE * vlen); + // Loading PQ codes + uint32_t pq_codes[nelem]; +#pragma unroll + for (std::uint32_t e = 0; e < nelem; e++) { + const std::uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset / PQ_LEN; + if (k >= n_subspace) break; + // Loading 4 x 8-bit PQ-codes using 32-bit load ops (from device memory) + pq_codes[e] = *(reinterpret_cast( + encoded_dataset_ptr + (static_cast(encoded_dataset_dim) * node_id) + + 4 + k)); + } + // + if constexpr ((std::is_same::value) && (PQ_LEN % 2 == 0)) { + // **** Use half2 for distance computation **** + half2 norm2{0, 0}; +#pragma unroll + for (std::uint32_t e = 0; e < nelem; e++) { + const std::uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset / PQ_LEN; + if (k >= n_subspace) break; + // Loading VQ code-book + raft::TxN_t vq_vals[PQ_LEN]; +#pragma unroll + for (std::uint32_t m = 0; m < PQ_LEN; m += 1) { + const uint32_t d = (vlen * m) + (PQ_LEN * k); + if (d >= dim) break; + vq_vals[m].load( + reinterpret_cast(vq_code_book_ptr + d + (dim * vq_code)), 0); + } + // Compute distance + std::uint32_t pq_code = pq_codes[e]; +#pragma unroll + for (std::uint32_t v = 0; v < vlen; v++) { + if (PQ_LEN * (v + k) >= dim) break; +#pragma unroll + for (std::uint32_t m = 0; m < PQ_LEN; m += 2) { + const std::uint32_t d1 = m + (PQ_LEN * v); + const std::uint32_t d = d1 + (PQ_LEN * k); + // Loading query vector in smem + half2 diff2 = (reinterpret_cast( + query_ptr))[device::swizzling(d / 2)]; + // Loading PQ code book in smem + diff2 -= *(reinterpret_cast( + smem_pq_code_book_ptr + (1 << PQ_BITS) * 2 * (m / 2) + (2 * (pq_code & 0xff)))); + diff2 -= vq_vals[d1 / vlen].val.data[(d1 % vlen) / 2]; + norm2 += diff2 * diff2; + } + pq_code >>= 8; + } + } + norm += static_cast(norm2.x + norm2.y); + } else { + // **** Use float for distance computation **** +#pragma unroll + for (std::uint32_t e = 0; e < nelem; e++) { + const std::uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset / PQ_LEN; + if (k >= n_subspace) break; + // Loading VQ code-book + raft::TxN_t vq_vals[PQ_LEN]; +#pragma unroll + for (std::uint32_t m = 0; m < PQ_LEN; m++) { + const std::uint32_t d = (vlen * m) + (PQ_LEN * k); + if (d >= dim) break; + // Loading 4 x 8/16-bit VQ-values using 32/64-bit load ops (from L2$ or device + // memory) + vq_vals[m].load( + reinterpret_cast(vq_code_book_ptr + d + (dim * vq_code)), 0); + } + // Compute distance + std::uint32_t pq_code = pq_codes[e]; +#pragma unroll + for (std::uint32_t v = 0; v < vlen; v++) { + if (PQ_LEN * (v + k) >= dim) break; + raft::TxN_t pq_vals; + pq_vals.load( + reinterpret_cast(smem_pq_code_book_ptr + PQ_LEN * (pq_code & 0xff)), + 0); // (from L1$ or smem) +#pragma unroll + for (std::uint32_t m = 0; m < PQ_LEN; m++) { + const std::uint32_t d1 = m + (PQ_LEN * v); + const std::uint32_t d = d1 + (PQ_LEN * k); + // if (d >= dataset_dim) break; + DISTANCE_T diff = query_ptr[d]; // (from smem) + diff -= pq_scale * static_cast(pq_vals.data[m]); + diff -= vq_scale * static_cast(vq_vals[d1 / vlen].val.data[d1 % vlen]); + norm += diff * diff; + } + pq_code >>= 8; + } + } + } + } + } + } + for (uint32_t offset = TEAM_SIZE / 2; offset > 0; offset >>= 1) { + norm += __shfl_xor_sync(0xffffffff, norm, offset); + } + return norm; + } +}; + +} // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/device_common.hpp b/cpp/include/raft/neighbors/detail/cagra/device_common.hpp index cd7469b55e..d4d69e6a67 100644 --- a/cpp/include/raft/neighbors/detail/cagra/device_common.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/device_common.hpp @@ -42,13 +42,17 @@ _RAFT_HOST_DEVICE inline uint64_t xorshift64(uint64_t u) return u * 0x2545F4914F6CDD1DULL; } -template +template _RAFT_DEVICE inline T swizzling(T x) { // Address swizzling reduces bank conflicts in shared memory, but increases // the amount of operation instead. // return x; - return x ^ (x >> 5); // "x" must be less than 1024 + if constexpr (X_MAX <= 1024) { + return (x) ^ ((x) >> 5); + } else { + return (x) ^ (((x) >> 5) & 0x1f); + } } } // namespace device diff --git a/cpp/include/raft/neighbors/detail/cagra/factory.cuh b/cpp/include/raft/neighbors/detail/cagra/factory.cuh index 0aee912e25..4944b57c46 100644 --- a/cpp/include/raft/neighbors/detail/cagra/factory.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/factory.cuh @@ -25,16 +25,18 @@ namespace raft::neighbors::cagra::detail { -template class factory { + using T = typename DATASET_DESCRIPTOR_T::DATA_T; + using IdxT = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DistanceT = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + public: /** * Create a search structure for dataset with dim features. */ - static std::unique_ptr> create( + static std::unique_ptr> create( raft::resources const& res, search_params const& params, int64_t dim, @@ -63,28 +65,28 @@ class factory { break; default: THROW("Incorrect dataset_block_dim (%lu)\n", plan.dataset_block_dim); } - return std::unique_ptr>(); + return std::unique_ptr>(); } private: template - static std::unique_ptr> dispatch_kernel( - raft::resources const& res, search_plan_impl_base& plan) + static std::unique_ptr> + dispatch_kernel(raft::resources const& res, search_plan_impl_base& plan) { if (plan.algo == search_algo::SINGLE_CTA) { - return std::unique_ptr>( + return std::unique_ptr>( new single_cta_search:: - search( + search( res, plan, plan.dim, plan.graph_degree, plan.topk)); } else if (plan.algo == search_algo::MULTI_CTA) { - return std::unique_ptr>( + return std::unique_ptr>( new multi_cta_search:: - search( + search( res, plan, plan.dim, plan.graph_degree, plan.topk)); } else { - return std::unique_ptr>( + return std::unique_ptr>( new multi_kernel_search:: - search( + search( res, plan, plan.dim, plan.graph_degree, plan.topk)); } } diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh index 1fcd159959..8192b1ae51 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh @@ -45,44 +45,46 @@ namespace multi_cta_search { template -struct search : public search_plan_impl { - using search_plan_impl::max_queries; - using search_plan_impl::itopk_size; - using search_plan_impl::algo; - using search_plan_impl::team_size; - using search_plan_impl::search_width; - using search_plan_impl::min_iterations; - using search_plan_impl::max_iterations; - using search_plan_impl::thread_block_size; - using search_plan_impl::hashmap_mode; - using search_plan_impl::hashmap_min_bitlen; - using search_plan_impl::hashmap_max_fill_rate; - using search_plan_impl::num_random_samplings; - using search_plan_impl::rand_xor_mask; +struct search : public search_plan_impl { + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; - using search_plan_impl::dim; - using search_plan_impl::graph_degree; - using search_plan_impl::topk; + using search_plan_impl::max_queries; + using search_plan_impl::itopk_size; + using search_plan_impl::algo; + using search_plan_impl::team_size; + using search_plan_impl::search_width; + using search_plan_impl::min_iterations; + using search_plan_impl::max_iterations; + using search_plan_impl::thread_block_size; + using search_plan_impl::hashmap_mode; + using search_plan_impl::hashmap_min_bitlen; + using search_plan_impl::hashmap_max_fill_rate; + using search_plan_impl::num_random_samplings; + using search_plan_impl::rand_xor_mask; - using search_plan_impl::hash_bitlen; + using search_plan_impl::dim; + using search_plan_impl::graph_degree; + using search_plan_impl::topk; - using search_plan_impl::small_hash_bitlen; - using search_plan_impl::small_hash_reset_interval; - using search_plan_impl::hashmap_size; - using search_plan_impl::dataset_size; - using search_plan_impl::result_buffer_size; + using search_plan_impl::hash_bitlen; - using search_plan_impl::smem_size; + using search_plan_impl::small_hash_bitlen; + using search_plan_impl::small_hash_reset_interval; + using search_plan_impl::hashmap_size; + using search_plan_impl::dataset_size; + using search_plan_impl::result_buffer_size; - using search_plan_impl::hashmap; - using search_plan_impl::num_executed_iterations; - using search_plan_impl::dev_seed; - using search_plan_impl::num_seeds; + using search_plan_impl::smem_size; + + using search_plan_impl::hashmap; + using search_plan_impl::num_executed_iterations; + using search_plan_impl::dev_seed; + using search_plan_impl::num_seeds; uint32_t num_cta_per_query; rmm::device_uvector intermediate_indices; @@ -95,8 +97,7 @@ struct search : public search_plan_impl( - res, params, dim, graph_degree, topk), + : search_plan_impl(res, params, dim, graph_degree, topk), intermediate_indices(0, resource::get_cuda_stream(res)), intermediate_distances(0, resource::get_cuda_stream(res)), topk_workspace(0, resource::get_cuda_stream(res)) @@ -120,9 +121,11 @@ struct search : public search_plan_impl(dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + smem_size = sizeof(float) * query_smem_buffer_length + (sizeof(INDEX_T) + sizeof(DISTANCE_T)) * result_buffer_size_32 + - sizeof(uint32_t) * search_width + sizeof(uint32_t); + sizeof(uint32_t) * search_width + sizeof(uint32_t) + + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte; RAFT_LOG_DEBUG("# smem_size: %u", smem_size); // @@ -191,22 +194,25 @@ struct search : public search_plan_impl dataset, - raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] - const uint32_t num_queries, - const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] - uint32_t* const num_executed_iterations, // [num_queries,] - uint32_t topk, - SAMPLE_FILTER_T sample_filter) + void operator()( + raft::resources const& res, + // raft::device_matrix_view dataset, + DATASET_DESCRIPTOR_T dataset_desc, + raft::device_matrix_view + graph, + typename DATASET_DESCRIPTOR_T::INDEX_T* const topk_indices_ptr, // [num_queries, topk] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] + const uint32_t num_queries, + const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* const num_executed_iterations, // [num_queries,] + uint32_t topk, + SAMPLE_FILTER_T sample_filter) { cudaStream_t stream = resource::get_cuda_stream(res); - select_and_run( - dataset, + select_and_run( + dataset_desc, graph, intermediate_indices.data(), intermediate_distances.data(), diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh index 7a5ad17460..50f9e69593 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh @@ -15,6 +15,7 @@ */ #pragma once +#include #include // none_cagra_sample_filter #include // RAFT_EXPLICIT @@ -27,63 +28,66 @@ namespace multi_cta_search { template -void select_and_run(raft::device_matrix_view dataset, - raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, - DISTANCE_T* const topk_distances_ptr, - const DATA_T* const queries_ptr, - const uint32_t num_queries, - const INDEX_T* dev_seed_ptr, - uint32_t* const num_executed_iterations, - uint32_t topk, - uint32_t block_size, - uint32_t result_buffer_size, - uint32_t smem_size, - int64_t hash_bitlen, - INDEX_T* hashmap_ptr, - uint32_t num_cta_per_query, - uint32_t num_random_samplings, - uint64_t rand_xor_mask, - uint32_t num_seeds, - size_t itopk_size, - size_t search_width, - size_t min_iterations, - size_t max_iterations, - SAMPLE_FILTER_T sample_filter, - cudaStream_t stream) RAFT_EXPLICIT; +void select_and_run( + DATASET_DESCRIPTOR_T dataset_desc, + raft::device_matrix_view graph, + typename DATASET_DESCRIPTOR_T::INDEX_T* const topk_indices_ptr, + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const topk_distances_ptr, + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, + const uint32_t num_queries, + const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, + uint32_t* const num_executed_iterations, + uint32_t topk, + uint32_t block_size, + uint32_t result_buffer_size, + uint32_t smem_size, + int64_t hash_bitlen, + typename DATASET_DESCRIPTOR_T::INDEX_T* hashmap_ptr, + uint32_t num_cta_per_query, + uint32_t num_random_samplings, + uint64_t rand_xor_mask, + uint32_t num_seeds, + size_t itopk_size, + size_t search_width, + size_t min_iterations, + size_t max_iterations, + SAMPLE_FILTER_T sample_filter, + cudaStream_t stream) RAFT_EXPLICIT; #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void select_and_run< \ + TEAM_SIZE, \ + MAX_DATASET_DIM, \ + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::standard_dataset_descriptor_t \ + dataset_desc, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); instantiate_kernel_selection( @@ -120,5 +124,292 @@ instantiate_kernel_selection( 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection + +#define instantiate_q_kernel_selection(TEAM_SIZE, \ + MAX_DATASET_DIM, \ + CODE_BOOK_T, \ + PQ_BITS, \ + PQ_CODE_BOOK_DIM, \ + DATA_T, \ + INDEX_T, \ + DISTANCE_T, \ + SAMPLE_FILTER_T) \ + extern template void \ + select_and_run, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t dataset_desc, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_q_kernel_selection( + 8, 128, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection( + 16, 256, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection( + 32, 512, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 2, + half, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection( + 8, 128, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection( + 16, 256, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection( + 32, 512, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 4, + half, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_kernel_selection( + 8, 128, half, 8, 2, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(16, + 256, + half, + 8, + 2, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 512, + half, + 8, + 2, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 2, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection( + 8, 128, half, 8, 4, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(16, + 256, + half, + 8, + 4, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 512, + half, + 8, + 4, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 4, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_kernel_selection(8, + 128, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(16, + 256, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 512, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(8, + 128, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(16, + 256, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 512, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_kernel_selection(8, + 128, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(16, + 256, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 512, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(8, + 128, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(16, + 256, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 512, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_q_kernel_selection } // namespace multi_cta_search } // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh index 30f56780d6..48c22d9d14 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh @@ -123,29 +123,26 @@ __device__ inline void topk_by_bitonic_sort(float* distances, // [num_elements] // // multiple CTAs per single query // -template __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( - INDEX_T* const result_indices_ptr, // [num_queries, num_cta_per_query, itopk_size] - DISTANCE_T* const result_distances_ptr, // [num_queries, num_cta_per_query, itopk_size] - const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] - const size_t dataset_dim, - const size_t dataset_size, - const size_t dataset_ld, - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] - const INDEX_T* const knn_graph, // [dataset_size, graph_degree] + typename DATASET_DESCRIPTOR_T::INDEX_T* const + result_indices_ptr, // [num_queries, num_cta_per_query, itopk_size] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const + result_distances_ptr, // [num_queries, num_cta_per_query, itopk_size] + DATASET_DESCRIPTOR_T dataset_desc, + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] + const typename DATASET_DESCRIPTOR_T::INDEX_T* const knn_graph, // [dataset_size, graph_degree] const uint32_t graph_degree, const unsigned num_distilation, const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] + const typename DATASET_DESCRIPTOR_T::INDEX_T* seed_ptr, // [num_queries, num_seeds] const uint32_t num_seeds, - INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] + typename DATASET_DESCRIPTOR_T::INDEX_T* const + visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] const uint32_t hash_bitlen, const uint32_t itopk_size, const uint32_t search_width, @@ -154,6 +151,11 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( uint32_t* const num_executed_iterations, /* stats */ SAMPLE_FILTER_T sample_filter) { + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + using QUERY_T = typename DATASET_DESCRIPTOR_T::QUERY_T; + const auto num_queries = gridDim.y; const auto query_id = blockIdx.y; const auto num_cta_per_query = gridDim.x; @@ -188,14 +190,20 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( assert(result_buffer_size_32 <= MAX_ELEMENTS); const auto query_smem_buffer_length = - raft::ceildiv(dataset_dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; - auto query_buffer = reinterpret_cast(smem); + raft::ceildiv(dataset_desc.dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + auto query_buffer = reinterpret_cast(smem); auto result_indices_buffer = reinterpret_cast(query_buffer + query_smem_buffer_length); auto result_distances_buffer = reinterpret_cast(result_indices_buffer + result_buffer_size_32); auto parent_indices_buffer = reinterpret_cast(result_distances_buffer + result_buffer_size_32); - auto terminate_flag = reinterpret_cast(parent_indices_buffer + search_width); + auto distance_work_buffer_ptr = + reinterpret_cast(parent_indices_buffer + search_width); + auto terminate_flag = reinterpret_cast(distance_work_buffer_ptr + + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte); + + // Set smem working buffer for the distance calculation + dataset_desc.set_smem_ptr(distance_work_buffer_ptr); #if 0 /* debug */ @@ -204,15 +212,10 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( result_distances_buffer[i] = utils::get_max_value(); } #endif - const DATA_T* const query_ptr = queries_ptr + (dataset_dim * query_id); - for (unsigned i = threadIdx.x; i < query_smem_buffer_length; i += blockDim.x) { - unsigned j = device::swizzling(i); - if (i < dataset_dim) { - query_buffer[j] = spatial::knn::detail::utils::mapping{}(query_ptr[i]); - } else { - query_buffer[j] = 0.0; - } - } + const DATA_T* const query_ptr = queries_ptr + (dataset_desc.dim * query_id); + dataset_desc.template copy_query( + query_ptr, query_buffer, query_smem_buffer_length); + if (threadIdx.x == 0) { terminate_flag[0] = 0; } INDEX_T* const local_visited_hashmap_ptr = visited_hashmap_ptr + (hashmap::get_size(hash_bitlen) * query_id); @@ -224,23 +227,19 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( const INDEX_T* const local_seed_ptr = seed_ptr ? seed_ptr + (num_seeds * query_id) : nullptr; uint32_t block_id = cta_id + (num_cta_per_query * query_id); uint32_t num_blocks = num_cta_per_query * num_queries; - device::compute_distance_to_random_nodes( - result_indices_buffer, - result_distances_buffer, - query_buffer, - dataset_ptr, - dataset_dim, - dataset_size, - dataset_ld, - result_buffer_size, - num_distilation, - rand_xor_mask, - local_seed_ptr, - num_seeds, - local_visited_hashmap_ptr, - hash_bitlen, - block_id, - num_blocks); + device::compute_distance_to_random_nodes(result_indices_buffer, + result_distances_buffer, + query_buffer, + dataset_desc, + result_buffer_size, + num_distilation, + rand_xor_mask, + local_seed_ptr, + num_seeds, + local_visited_hashmap_ptr, + hash_bitlen, + block_id, + num_blocks); __syncthreads(); _CLK_REC(clk_compute_1st_distance); @@ -272,13 +271,11 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( _CLK_START(); // constexpr unsigned max_n_frags = 16; constexpr unsigned max_n_frags = 0; - device::compute_distance_to_child_nodes( + device::compute_distance_to_child_nodes( result_indices_buffer + itopk_size, result_distances_buffer + itopk_size, query_buffer, - dataset_ptr, - dataset_dim, - dataset_ld, + dataset_desc, knn_graph, graph_degree, local_visited_hashmap_ptr, @@ -398,53 +395,35 @@ void set_value_batch(T* const dev_ptr, <<>>(dev_ptr, ld, val, count, batch_size); } -template struct search_kernel_config { // Search kernel function type. Note that the actual values for the template value // parameters do not matter, because they are not part of the function signature. The // second to fourth value parameters will be selected by the choose_* functions below. using kernel_t = decltype(&search_kernel); static auto choose_buffer_size(unsigned result_buffer_size, unsigned block_size) -> kernel_t { if (result_buffer_size <= 64) { - return search_kernel; + return search_kernel; } else if (result_buffer_size <= 128) { return search_kernel; } else if (result_buffer_size <= 256) { return search_kernel; } THROW("Result buffer size %u larger than max buffer size %u", result_buffer_size, 256); @@ -453,26 +432,24 @@ struct search_kernel_config { template -void select_and_run( // raft::resources const& res, - raft::device_matrix_view dataset, - raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] +void select_and_run( + DATASET_DESCRIPTOR_T dataset_desc, + raft::device_matrix_view graph, + typename DATASET_DESCRIPTOR_T::INDEX_T* const topk_indices_ptr, // [num_queries, topk] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, - const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] - uint32_t* const num_executed_iterations, // [num_queries,] + const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* const num_executed_iterations, // [num_queries,] uint32_t topk, // multi_cta_search (params struct) uint32_t block_size, // uint32_t result_buffer_size, uint32_t smem_size, int64_t hash_bitlen, - INDEX_T* hashmap_ptr, + typename DATASET_DESCRIPTOR_T::INDEX_T* hashmap_ptr, uint32_t num_cta_per_query, uint32_t num_random_samplings, uint64_t rand_xor_mask, @@ -485,19 +462,20 @@ void select_and_run( // raft::resources const& res, cudaStream_t stream) { auto kernel = - search_kernel_config::choose_buffer_size(result_buffer_size, block_size); - - RAFT_CUDA_TRY( - cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); + search_kernel_config:: + choose_buffer_size(result_buffer_size, block_size); + + RAFT_CUDA_TRY(cudaFuncSetAttribute(kernel, + cudaFuncAttributeMaxDynamicSharedMemorySize, + smem_size + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte)); // Initialize hash table const uint32_t hash_size = hashmap::get_size(hash_bitlen); - set_value_batch( - hashmap_ptr, hash_size, utils::get_max_value(), hash_size, num_queries, stream); + set_value_batch(hashmap_ptr, + hash_size, + utils::get_max_value(), + hash_size, + num_queries, + stream); dim3 block_dims(block_size, 1, 1); dim3 grid_dims(num_cta_per_query, num_queries, 1); @@ -508,10 +486,7 @@ void select_and_run( // raft::resources const& res, smem_size); kernel<<>>(topk_indices_ptr, topk_distances_ptr, - dataset.data_handle(), - dataset.extent(1), - dataset.extent(0), - dataset.stride(0), + dataset_desc, queries_ptr, graph.data_handle(), graph.extent(1), diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh index e4a30675bb..10788da432 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh @@ -16,6 +16,7 @@ #pragma once #include "compute_distance.hpp" +#include "compute_distance_vpq.cuh" #include "device_common.hpp" #include "hashmap.hpp" #include "search_plan.cuh" @@ -86,27 +87,25 @@ void get_value(T* const host_ptr, const T* const dev_ptr, cudaStream_t cuda_stre } // MAX_DATASET_DIM : must equal to or greater than dataset_dim -template -RAFT_KERNEL random_pickup_kernel(const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] - const std::size_t dataset_dim, - const std::size_t dataset_size, - const std::size_t dataset_ld, - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] - const std::size_t num_pickup, - const unsigned num_distilation, - const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] - const uint32_t num_seeds, - INDEX_T* const result_indices_ptr, // [num_queries, ldr] - DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] - const std::uint32_t ldr, // (*) ldr >= num_pickup - INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << bitlen] - const std::uint32_t hash_bitlen) +template +RAFT_KERNEL random_pickup_kernel( + const DATASET_DESCRIPTOR_T dataset_desc, + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] + const std::size_t num_pickup, + const unsigned num_distilation, + const uint64_t rand_xor_mask, + const typename DATASET_DESCRIPTOR_T::INDEX_T* seed_ptr, // [num_queries, num_seeds] + const uint32_t num_seeds, + typename DATASET_DESCRIPTOR_T::INDEX_T* const result_indices_ptr, // [num_queries, ldr] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] + const std::uint32_t ldr, // (*) ldr >= num_pickup + typename DATASET_DESCRIPTOR_T::INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << bitlen] + const std::uint32_t hash_bitlen) { + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + const auto ldb = hashmap::get_size(hash_bitlen); const auto global_team_index = (blockIdx.x * blockDim.x + threadIdx.x) / TEAM_SIZE; const uint32_t query_id = blockIdx.y; @@ -114,19 +113,17 @@ RAFT_KERNEL random_pickup_kernel(const DATA_T* const dataset_ptr, // [dataset_s // Load a query extern __shared__ float query_buffer[]; const auto query_smem_buffer_length = - raft::ceildiv(dataset_dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + raft::ceildiv(dataset_desc.dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; for (uint32_t i = threadIdx.x; i < query_smem_buffer_length; i += blockDim.x) { unsigned j = device::swizzling(i); - if (i < dataset_dim) { - query_buffer[j] = - spatial::knn::detail::utils::mapping{}((queries_ptr + query_id * dataset_dim)[i]); + if (i < dataset_desc.dim) { + query_buffer[j] = spatial::knn::detail::utils::mapping{}( + (queries_ptr + query_id * dataset_desc.dim)[i]); } else { query_buffer[j] = 0.0; } } __syncthreads(); - device::distance_op dist_op( - query_buffer); INDEX_T best_index_team_local; DISTANCE_T best_norm2_team_local = utils::get_max_value(); @@ -136,10 +133,12 @@ RAFT_KERNEL random_pickup_kernel(const DATA_T* const dataset_ptr, // [dataset_s seed_index = seed_ptr[global_team_index + (num_seeds * query_id)]; } else { // Chose a seed node randomly - seed_index = device::xorshift64((global_team_index ^ rand_xor_mask) * (i + 1)) % dataset_size; + seed_index = + device::xorshift64((global_team_index ^ rand_xor_mask) * (i + 1)) % dataset_desc.size; } - const auto norm2 = dist_op(dataset_ptr + (dataset_ld * seed_index), dataset_dim, true); + const auto norm2 = dataset_desc.template compute_similarity( + query_buffer, seed_index, true); if (norm2 < best_norm2_team_local) { best_norm2_team_local = norm2; @@ -161,28 +160,22 @@ RAFT_KERNEL random_pickup_kernel(const DATA_T* const dataset_ptr, // [dataset_s } // MAX_DATASET_DIM : must be equal to or greater than dataset_dim -template -void random_pickup(const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] - const std::size_t dataset_dim, - const std::size_t dataset_size, - const std::size_t dataset_ld, - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] - const std::size_t num_queries, - const std::size_t num_pickup, - const unsigned num_distilation, - const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] - const uint32_t num_seeds, - INDEX_T* const result_indices_ptr, // [num_queries, ldr] - DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] - const std::size_t ldr, // (*) ldr >= num_pickup - INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << bitlen] - const std::uint32_t hash_bitlen, - cudaStream_t const cuda_stream = 0) +template +void random_pickup( + const DATASET_DESCRIPTOR_T dataset_desc, + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] + const std::size_t num_queries, + const std::size_t num_pickup, + const unsigned num_distilation, + const uint64_t rand_xor_mask, + const typename DATASET_DESCRIPTOR_T::INDEX_T* seed_ptr, // [num_queries, num_seeds] + const uint32_t num_seeds, + typename DATASET_DESCRIPTOR_T::INDEX_T* const result_indices_ptr, // [num_queries, ldr] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] + const std::size_t ldr, // (*) ldr >= num_pickup + typename DATASET_DESCRIPTOR_T::INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << bitlen] + const std::uint32_t hash_bitlen, + cudaStream_t const cuda_stream = 0) { const auto block_size = 256u; const auto num_teams_per_threadblock = block_size / TEAM_SIZE; @@ -190,14 +183,11 @@ void random_pickup(const DATA_T* const dataset_ptr, // [dataset_size, dataset_d num_queries); const auto query_smem_buffer_length = - raft::ceildiv(dataset_dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + raft::ceildiv(dataset_desc.dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; const auto smem_size = query_smem_buffer_length * sizeof(float); - random_pickup_kernel - <<>>(dataset_ptr, - dataset_dim, - dataset_size, - dataset_ld, + random_pickup_kernel + <<>>(dataset_desc, queries_ptr, num_pickup, num_distilation, @@ -313,30 +303,33 @@ void pickup_next_parents(INDEX_T* const parent_candidates_ptr, // [num_queries, template RAFT_KERNEL compute_distance_to_child_nodes_kernel( - const INDEX_T* const parent_node_list, // [num_queries, search_width] - INDEX_T* const parent_candidates_ptr, // [num_queries, search_width] - DISTANCE_T* const parent_distance_ptr, // [num_queries, search_width] + const typename DATASET_DESCRIPTOR_T::INDEX_T* const + parent_node_list, // [num_queries, search_width] + typename DATASET_DESCRIPTOR_T::INDEX_T* const + parent_candidates_ptr, // [num_queries, search_width] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const + parent_distance_ptr, // [num_queries, search_width] const std::size_t lds, const std::uint32_t search_width, - const DATA_T* const dataset_ptr, // [dataset_size, data_dim] - const std::uint32_t dataset_dim, - const std::uint32_t dataset_size, - const std::uint32_t dataset_ld, - const INDEX_T* const neighbor_graph_ptr, // [dataset_size, graph_degree] + const DATASET_DESCRIPTOR_T dataset_desc, + const typename DATASET_DESCRIPTOR_T::INDEX_T* const + neighbor_graph_ptr, // [dataset_size, graph_degree] const std::uint32_t graph_degree, - const DATA_T* query_ptr, // [num_queries, data_dim] - INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] + const typename DATASET_DESCRIPTOR_T::DATA_T* query_ptr, // [num_queries, data_dim] + typename DATASET_DESCRIPTOR_T::INDEX_T* const + visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] const std::uint32_t hash_bitlen, - INDEX_T* const result_indices_ptr, // [num_queries, ldd] - DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] - const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree + typename DATASET_DESCRIPTOR_T::INDEX_T* const result_indices_ptr, // [num_queries, ldd] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] + const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree SAMPLE_FILTER_T sample_filter) { + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + const uint32_t ldb = hashmap::get_size(hash_bitlen); const auto tid = threadIdx.x + blockDim.x * blockIdx.x; const auto global_team_id = tid / TEAM_SIZE; @@ -344,12 +337,12 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel( extern __shared__ float query_buffer[]; const auto query_smem_buffer_length = - raft::ceildiv(dataset_dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + raft::ceildiv(dataset_desc.dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; for (uint32_t i = threadIdx.x; i < query_smem_buffer_length; i += blockDim.x) { unsigned j = device::swizzling(i); - if (i < dataset_dim) { + if (i < dataset_desc.dim) { query_buffer[j] = - spatial::knn::detail::utils::mapping{}((query_ptr + query_id * dataset_dim)[i]); + spatial::knn::detail::utils::mapping{}((query_ptr + query_id * dataset_desc.dim)[i]); } else { query_buffer[j] = 0.0; } @@ -357,9 +350,6 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel( __syncthreads(); if (global_team_id >= search_width * graph_degree) { return; } - device::distance_op dist_op( - query_buffer); - const std::size_t parent_list_index = parent_node_list[global_team_id / graph_degree + (search_width * blockIdx.y)]; @@ -381,8 +371,8 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel( const auto compute_distance_flag = hashmap::insert( visited_hashmap_ptr + (ldb * blockIdx.y), hash_bitlen, child_id); - const auto norm2 = - dist_op(dataset_ptr + (dataset_ld * child_id), dataset_dim, compute_distance_flag); + const auto norm2 = dataset_desc.template compute_similarity( + query_buffer, child_id, compute_distance_flag); if (compute_distance_flag) { if (threadIdx.x % TEAM_SIZE == 0) { @@ -407,29 +397,29 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel( template + class SAMPLE_FILTER_T, + class DATASET_DESCRIPTOR_T> void compute_distance_to_child_nodes( - const INDEX_T* const parent_node_list, // [num_queries, search_width] - INDEX_T* const parent_candidates_ptr, // [num_queries, search_width] - DISTANCE_T* const parent_distance_ptr, // [num_queries, search_width] + const typename DATASET_DESCRIPTOR_T::INDEX_T* const + parent_node_list, // [num_queries, search_width] + typename DATASET_DESCRIPTOR_T::INDEX_T* const + parent_candidates_ptr, // [num_queries, search_width] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const + parent_distance_ptr, // [num_queries, search_width] const std::size_t lds, const uint32_t search_width, - const DATA_T* const dataset_ptr, // [dataset_size, data_dim] - const std::uint32_t dataset_dim, - const std::uint32_t dataset_size, - const std::uint32_t dataset_ld, - const INDEX_T* const neighbor_graph_ptr, // [dataset_size, graph_degree] + const DATASET_DESCRIPTOR_T dataset_desc, + const typename DATASET_DESCRIPTOR_T::INDEX_T* const + neighbor_graph_ptr, // [dataset_size, graph_degree] const std::uint32_t graph_degree, - const DATA_T* query_ptr, // [num_queries, data_dim] + const typename DATASET_DESCRIPTOR_T::DATA_T* query_ptr, // [num_queries, data_dim] const std::uint32_t num_queries, - INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] + typename DATASET_DESCRIPTOR_T::INDEX_T* const + visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] const std::uint32_t hash_bitlen, - INDEX_T* const result_indices_ptr, // [num_queries, ldd] - DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] - const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree + typename DATASET_DESCRIPTOR_T::INDEX_T* const result_indices_ptr, // [num_queries, ldd] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] + const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree SAMPLE_FILTER_T sample_filter, cudaStream_t cuda_stream = 0) { @@ -439,20 +429,21 @@ void compute_distance_to_child_nodes( num_queries); const auto query_smem_buffer_length = - raft::ceildiv(dataset_dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + raft::ceildiv(dataset_desc.dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; - const auto smem_size = query_smem_buffer_length * sizeof(float); + const auto smem_size = + query_smem_buffer_length * sizeof(float) + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte; - compute_distance_to_child_nodes_kernel + compute_distance_to_child_nodes_kernel <<>>(parent_node_list, parent_candidates_ptr, parent_distance_ptr, lds, search_width, - dataset_ptr, - dataset_dim, - dataset_size, - dataset_ld, + dataset_desc, neighbor_graph_ptr, graph_degree, query_ptr, @@ -609,47 +600,51 @@ void set_value_batch(T* const dev_ptr, // |<--- result_buffer_size --->| // Double buffer (B) template -struct search : search_plan_impl { - using search_plan_impl::max_queries; - using search_plan_impl::itopk_size; - using search_plan_impl::algo; - using search_plan_impl::team_size; - using search_plan_impl::search_width; - using search_plan_impl::min_iterations; - using search_plan_impl::max_iterations; - using search_plan_impl::thread_block_size; - using search_plan_impl::hashmap_mode; - using search_plan_impl::hashmap_min_bitlen; - using search_plan_impl::hashmap_max_fill_rate; - using search_plan_impl::num_random_samplings; - using search_plan_impl::rand_xor_mask; - - using search_plan_impl::dim; - using search_plan_impl::graph_degree; - using search_plan_impl::topk; - - using search_plan_impl::hash_bitlen; - - using search_plan_impl::small_hash_bitlen; - using search_plan_impl::small_hash_reset_interval; - using search_plan_impl::hashmap_size; - using search_plan_impl::dataset_size; - using search_plan_impl::result_buffer_size; - - using search_plan_impl::smem_size; - - using search_plan_impl::hashmap; - using search_plan_impl::num_executed_iterations; - using search_plan_impl::dev_seed; - using search_plan_impl::num_seeds; +struct search : search_plan_impl { + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + + static_assert(std::is_same_v, "Only float is supported as resulting distance"); + + using search_plan_impl::max_queries; + using search_plan_impl::itopk_size; + using search_plan_impl::algo; + using search_plan_impl::team_size; + using search_plan_impl::search_width; + using search_plan_impl::min_iterations; + using search_plan_impl::max_iterations; + using search_plan_impl::thread_block_size; + using search_plan_impl::hashmap_mode; + using search_plan_impl::hashmap_min_bitlen; + using search_plan_impl::hashmap_max_fill_rate; + using search_plan_impl::num_random_samplings; + using search_plan_impl::rand_xor_mask; + + using search_plan_impl::dim; + using search_plan_impl::graph_degree; + using search_plan_impl::topk; + + using search_plan_impl::hash_bitlen; + + using search_plan_impl::small_hash_bitlen; + using search_plan_impl::small_hash_reset_interval; + using search_plan_impl::hashmap_size; + using search_plan_impl::dataset_size; + using search_plan_impl::result_buffer_size; + + using search_plan_impl::smem_size; + + using search_plan_impl::hashmap; + using search_plan_impl::num_executed_iterations; + using search_plan_impl::dev_seed; + using search_plan_impl::num_seeds; size_t result_buffer_allocation_size; - rmm::device_uvector result_indices; // results_indices_buffer - rmm::device_uvector result_distances; // result_distances_buffer + rmm::device_uvector result_indices; // results_indices_buffer + rmm::device_uvector result_distances; // result_distances_buffer rmm::device_uvector parent_node_list; rmm::device_uvector topk_hint; rmm::device_scalar terminate_flag; // dev_terminate_flag, host_terminate_flag.; @@ -666,8 +661,7 @@ struct search : search_plan_impl { int64_t dim, int64_t graph_degree, uint32_t topk) - : search_plan_impl( - res, params, dim, graph_degree, topk), + : search_plan_impl(res, params, dim, graph_degree, topk), result_indices(0, resource::get_cuda_stream(res)), result_distances(0, resource::get_cuda_stream(res)), parent_node_list(0, resource::get_cuda_stream(res)), @@ -800,7 +794,7 @@ struct search : search_plan_impl { } void operator()(raft::resources const& res, - raft::device_matrix_view dataset, + DATASET_DESCRIPTOR_T dataset_desc, raft::device_matrix_view graph, INDEX_T* const topk_indices_ptr, // [num_queries, topk] DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] @@ -828,24 +822,20 @@ struct search : search_plan_impl { } // Choose initial entry point candidates at random - random_pickup( - dataset.data_handle(), - dataset.extent(1), - dataset.extent(0), - dataset.stride(0), - queries_ptr, - num_queries, - result_buffer_size, - num_random_samplings, - rand_xor_mask, - dev_seed_ptr, - num_seeds, - result_indices.data(), - result_distances.data(), - result_buffer_allocation_size, - hashmap.data(), - hash_bitlen, - stream); + random_pickup(dataset_desc, + queries_ptr, + num_queries, + result_buffer_size, + num_random_samplings, + rand_xor_mask, + dev_seed_ptr, + num_seeds, + result_indices.data(), + result_distances.data(), + result_buffer_allocation_size, + hashmap.data(), + hash_bitlen, + stream); unsigned iter = 0; while (1) { @@ -897,16 +887,13 @@ struct search : search_plan_impl { } // Compute distance to child nodes that are adjacent to the parent node - compute_distance_to_child_nodes( + compute_distance_to_child_nodes( parent_node_list.data(), result_indices.data() + (1 - (iter & 0x1)) * result_buffer_size, result_distances.data() + (1 - (iter & 0x1)) * result_buffer_size, result_buffer_allocation_size, search_width, - dataset.data_handle(), - dataset.extent(1), - dataset.extent(0), - dataset.stride(0), + dataset_desc, graph.data_handle(), graph.extent(1), queries_ptr, @@ -993,5 +980,68 @@ struct search : search_plan_impl { } }; +template +struct search, + SAMPLE_FILTER_T> + : public search_plan_impl, + SAMPLE_FILTER_T> { + using DATASET_DESCRIPTOR_T = cagra_q_dataset_descriptor_t; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + + search(raft::resources const& res, + search_params params, + int64_t dim, + int64_t graph_degree, + uint32_t topk) + : search_plan_impl(res, params, dim, graph_degree, topk) + { + THROW("The multi-kernel mode does not support VPQ"); + } + + void set_params(raft::resources const& res) {} + + void operator()(raft::resources const& res, + DATASET_DESCRIPTOR_T dataset_desc, + raft::device_matrix_view graph, + INDEX_T* const topk_indices_ptr, // [num_queries, topk] + DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + const uint32_t num_queries, + const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* const num_executed_iterations, // [num_queries,] + uint32_t topk, + SAMPLE_FILTER_T sample_filter) + { + } +}; + } // namespace multi_kernel_search } // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh index 11ef7e5211..be5ac0554f 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh @@ -71,8 +71,12 @@ struct search_plan_impl_base : public search_params { } }; -template +template struct search_plan_impl : public search_plan_impl_base { + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + int64_t hash_bitlen; size_t small_hash_bitlen; @@ -111,7 +115,7 @@ struct search_plan_impl : public search_plan_impl_base { virtual ~search_plan_impl() {} virtual void operator()(raft::resources const& res, - raft::device_matrix_view dataset, + DATASET_DESCRIPTOR_T dataset_desc, raft::device_matrix_view graph, INDEX_T* const result_indices_ptr, // [num_queries, topk] DISTANCE_T* const result_distances_ptr, // [num_queries, topk] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh index f1e74ee7a5..4430b929fb 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh @@ -48,43 +48,45 @@ namespace single_cta_search { template -struct search : search_plan_impl { - using search_plan_impl::max_queries; - using search_plan_impl::itopk_size; - using search_plan_impl::algo; - using search_plan_impl::team_size; - using search_plan_impl::search_width; - using search_plan_impl::min_iterations; - using search_plan_impl::max_iterations; - using search_plan_impl::thread_block_size; - using search_plan_impl::hashmap_mode; - using search_plan_impl::hashmap_min_bitlen; - using search_plan_impl::hashmap_max_fill_rate; - using search_plan_impl::num_random_samplings; - using search_plan_impl::rand_xor_mask; - - using search_plan_impl::dim; - using search_plan_impl::graph_degree; - using search_plan_impl::topk; - - using search_plan_impl::hash_bitlen; - - using search_plan_impl::small_hash_bitlen; - using search_plan_impl::small_hash_reset_interval; - using search_plan_impl::hashmap_size; - using search_plan_impl::dataset_size; - using search_plan_impl::result_buffer_size; - - using search_plan_impl::smem_size; - - using search_plan_impl::hashmap; - using search_plan_impl::num_executed_iterations; - using search_plan_impl::dev_seed; - using search_plan_impl::num_seeds; +struct search : search_plan_impl { + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + + using search_plan_impl::max_queries; + using search_plan_impl::itopk_size; + using search_plan_impl::algo; + using search_plan_impl::team_size; + using search_plan_impl::search_width; + using search_plan_impl::min_iterations; + using search_plan_impl::max_iterations; + using search_plan_impl::thread_block_size; + using search_plan_impl::hashmap_mode; + using search_plan_impl::hashmap_min_bitlen; + using search_plan_impl::hashmap_max_fill_rate; + using search_plan_impl::num_random_samplings; + using search_plan_impl::rand_xor_mask; + + using search_plan_impl::dim; + using search_plan_impl::graph_degree; + using search_plan_impl::topk; + + using search_plan_impl::hash_bitlen; + + using search_plan_impl::small_hash_bitlen; + using search_plan_impl::small_hash_reset_interval; + using search_plan_impl::hashmap_size; + using search_plan_impl::dataset_size; + using search_plan_impl::result_buffer_size; + + using search_plan_impl::smem_size; + + using search_plan_impl::hashmap; + using search_plan_impl::num_executed_iterations; + using search_plan_impl::dev_seed; + using search_plan_impl::num_seeds; uint32_t num_itopk_candidates; @@ -93,8 +95,7 @@ struct search : search_plan_impl { int64_t dim, int64_t graph_degree, uint32_t topk) - : search_plan_impl( - res, params, dim, graph_degree, topk) + : search_plan_impl(res, params, dim, graph_degree, topk) { set_params(res); } @@ -128,7 +129,8 @@ struct search : search_plan_impl { sizeof(float) * query_smem_buffer_length + (sizeof(INDEX_T) + sizeof(DISTANCE_T)) * result_buffer_size_32 + sizeof(INDEX_T) * hashmap::get_size(small_hash_bitlen) + sizeof(INDEX_T) * search_width + - sizeof(std::uint32_t) * topk_ws_size + sizeof(std::uint32_t); + sizeof(std::uint32_t) * topk_ws_size + sizeof(std::uint32_t) + + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte; smem_size = base_smem_size; if (num_itopk_candidates > 256) { // Tentatively calculate the required share memory size when radix @@ -205,7 +207,7 @@ struct search : search_plan_impl { } void operator()(raft::resources const& res, - raft::device_matrix_view dataset, + DATASET_DESCRIPTOR_T dataset_desc, raft::device_matrix_view graph, INDEX_T* const result_indices_ptr, // [num_queries, topk] DISTANCE_T* const result_distances_ptr, // [num_queries, topk] @@ -217,8 +219,8 @@ struct search : search_plan_impl { SAMPLE_FILTER_T sample_filter) { cudaStream_t stream = resource::get_cuda_stream(res); - select_and_run( - dataset, + select_and_run( + dataset_desc, graph, result_indices_ptr, result_distances_ptr, diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh index fef060ffee..a836334667 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh @@ -27,25 +27,23 @@ namespace single_cta_search { template void select_and_run( // raft::resources const& res, - raft::device_matrix_view dataset, - raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + DATASET_DESCRIPTOR_T dataset_desc, + raft::device_matrix_view graph, + typename DATASET_DESCRIPTOR_T::INDEX_T* const topk_indices_ptr, // [num_queries, topk] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, - const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] - uint32_t* const num_executed_iterations, // [num_queries,] + const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* const num_executed_iterations, // [num_queries,] uint32_t topk, uint32_t num_itopk_candidates, uint32_t block_size, uint32_t smem_size, int64_t hash_bitlen, - INDEX_T* hashmap_ptr, + typename DATASET_DESCRIPTOR_T::INDEX_T* hashmap_ptr, size_t small_hash_bitlen, size_t small_hash_reset_interval, uint32_t num_random_samplings, @@ -60,34 +58,38 @@ void select_and_run( // raft::resources const& res, #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void select_and_run< \ + TEAM_SIZE, \ + MAX_DATASET_DIM, \ + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::standard_dataset_descriptor_t \ + dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); instantiate_single_cta_select_and_run( @@ -125,5 +127,473 @@ instantiate_single_cta_select_and_run( #undef instantiate_single_cta_select_and_run +#define instantiate_q_single_cta_select_and_run(TEAM_SIZE, \ + MAX_DATASET_DIM, \ + CODE_BOOK_T, \ + PQ_BITS, \ + PQ_CODE_BOOK_DIM, \ + DATA_T, \ + INDEX_T, \ + DISTANCE_T, \ + SAMPLE_FILTER_T) \ + extern template void \ + select_and_run, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 16, 256, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 512, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + half, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 16, 256, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 512, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + half, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 2, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 2, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 2, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 4, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 4, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 4, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 2, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 16, 256, half, 8, 2, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 512, half, 8, 2, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 1024, half, 8, 2, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 4, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 16, 256, half, 8, 4, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 512, half, 8, 4, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 1024, half, 8, 4, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 2, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 16, 256, half, 8, 2, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 512, half, 8, 2, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + float, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 4, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 16, 256, half, 8, 4, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 512, half, 8, 4, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + float, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run(8, + 128, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(8, + 128, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run(8, + 128, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(8, + 128, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run(8, + 128, + half, + 8, + 2, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 2, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 2, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(8, + 128, + half, + 8, + 4, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 4, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 4, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 2, int8_t, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 2, + int8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 2, + int8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + int8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 4, int8_t, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 4, + int8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 4, + int8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + int8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_q_single_cta_select_and_run + } // namespace single_cta_search } // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh index 652115928b..a697f9512c 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh @@ -456,42 +456,44 @@ __device__ inline void set_value_device(T* const ptr, const T fill, const std::u } // One query one thread block -template -__launch_bounds__(1024, 1) RAFT_KERNEL - search_kernel(INDEX_T* const result_indices_ptr, // [num_queries, top_k] - DISTANCE_T* const result_distances_ptr, // [num_queries, top_k] - const std::uint32_t top_k, - const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] - const std::size_t dataset_dim, - const std::size_t dataset_size, - const std::size_t dataset_ld, // stride of dataset - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] - const INDEX_T* const knn_graph, // [dataset_size, graph_degree] - const std::uint32_t graph_degree, - const unsigned num_distilation, - const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] - const uint32_t num_seeds, - INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] - const std::uint32_t internal_topk, - const std::uint32_t search_width, - const std::uint32_t min_iteration, - const std::uint32_t max_iteration, - std::uint32_t* const num_executed_iterations, // [num_queries] - const std::uint32_t hash_bitlen, - const std::uint32_t small_hash_bitlen, - const std::uint32_t small_hash_reset_interval, - SAMPLE_FILTER_T sample_filter) +__launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( + typename DATASET_DESCRIPTOR_T::INDEX_T* const result_indices_ptr, // [num_queries, top_k] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const result_distances_ptr, // [num_queries, top_k] + const std::uint32_t top_k, + DATASET_DESCRIPTOR_T dataset_desc, + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] + const typename DATASET_DESCRIPTOR_T::INDEX_T* const knn_graph, // [dataset_size, graph_degree] + const std::uint32_t graph_degree, + const unsigned num_distilation, + const uint64_t rand_xor_mask, + const typename DATASET_DESCRIPTOR_T::INDEX_T* seed_ptr, // [num_queries, num_seeds] + const uint32_t num_seeds, + typename DATASET_DESCRIPTOR_T::INDEX_T* const + visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] + const std::uint32_t internal_topk, + const std::uint32_t search_width, + const std::uint32_t min_iteration, + const std::uint32_t max_iteration, + std::uint32_t* const num_executed_iterations, // [num_queries] + const std::uint32_t hash_bitlen, + const std::uint32_t small_hash_bitlen, + const std::uint32_t small_hash_reset_interval, + SAMPLE_FILTER_T sample_filter) { - using LOAD_T = device::LOAD_128BIT_T; + using LOAD_T = device::LOAD_128BIT_T; + + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + using QUERY_T = typename DATASET_DESCRIPTOR_T::QUERY_T; + const auto query_id = blockIdx.y; #ifdef _CLK_BREAKDOWN @@ -525,30 +527,31 @@ __launch_bounds__(1024, 1) RAFT_KERNEL const auto small_hash_size = hashmap::get_size(small_hash_bitlen); const auto query_smem_buffer_length = - raft::ceildiv(dataset_dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; - auto query_buffer = reinterpret_cast(smem); + raft::ceildiv(dataset_desc.dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + auto query_buffer = reinterpret_cast(smem); auto result_indices_buffer = reinterpret_cast(query_buffer + query_smem_buffer_length); auto result_distances_buffer = reinterpret_cast(result_indices_buffer + result_buffer_size_32); auto visited_hash_buffer = reinterpret_cast(result_distances_buffer + result_buffer_size_32); auto parent_list_buffer = reinterpret_cast(visited_hash_buffer + small_hash_size); - auto topk_ws = reinterpret_cast(parent_list_buffer + search_width); - auto terminate_flag = reinterpret_cast(topk_ws + 3); - auto smem_working_ptr = reinterpret_cast(terminate_flag + 1); + auto distance_work_buffer_ptr = + reinterpret_cast(parent_list_buffer + search_width); + auto topk_ws = reinterpret_cast(distance_work_buffer_ptr + + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte); + auto terminate_flag = reinterpret_cast(topk_ws + 3); + auto smem_work_ptr = reinterpret_cast(terminate_flag + 1); + + // Set smem working buffer for the distance calculation + dataset_desc.set_smem_ptr(distance_work_buffer_ptr); // A flag for filtering. auto filter_flag = terminate_flag; - const DATA_T* const query_ptr = queries_ptr + query_id * dataset_dim; - for (unsigned i = threadIdx.x; i < query_smem_buffer_length; i += blockDim.x) { - unsigned j = device::swizzling(i); - if (i < dataset_dim) { - query_buffer[j] = spatial::knn::detail::utils::mapping{}(query_ptr[i]); - } else { - query_buffer[j] = 0.0; - } - } + const DATA_T* const query_ptr = queries_ptr + query_id * dataset_desc.dim; + dataset_desc.template copy_query( + query_ptr, query_buffer, query_smem_buffer_length); + if (threadIdx.x == 0) { terminate_flag[0] = 0; topk_ws[0] = ~0u; @@ -568,21 +571,17 @@ __launch_bounds__(1024, 1) RAFT_KERNEL // compute distance to randomly selecting nodes _CLK_START(); const INDEX_T* const local_seed_ptr = seed_ptr ? seed_ptr + (num_seeds * query_id) : nullptr; - device::compute_distance_to_random_nodes( - result_indices_buffer, - result_distances_buffer, - query_buffer, - dataset_ptr, - dataset_dim, - dataset_size, - dataset_ld, - result_buffer_size, - num_distilation, - rand_xor_mask, - local_seed_ptr, - num_seeds, - local_visited_hashmap_ptr, - hash_bitlen); + device::compute_distance_to_random_nodes(result_indices_buffer, + result_distances_buffer, + query_buffer, + dataset_desc, + result_buffer_size, + num_distilation, + rand_xor_mask, + local_seed_ptr, + num_seeds, + local_visited_hashmap_ptr, + hash_bitlen); __syncthreads(); _CLK_REC(clk_compute_1st_distance); @@ -667,7 +666,7 @@ __launch_bounds__(1024, 1) RAFT_KERNEL nullptr, topk_ws, true, - reinterpret_cast(smem_working_ptr)); + reinterpret_cast(smem_work_ptr)); _CLK_REC(clk_topk); // reset small-hash table @@ -688,7 +687,7 @@ __launch_bounds__(1024, 1) RAFT_KERNEL parent_list_buffer, result_indices_buffer, internal_topk, - dataset_size, + dataset_desc.size, search_width); _CLK_REC(clk_pickup_parents); } @@ -708,13 +707,11 @@ __launch_bounds__(1024, 1) RAFT_KERNEL // compute the norms between child nodes and query node _CLK_START(); constexpr unsigned max_n_frags = 8; - device::compute_distance_to_child_nodes( + device::compute_distance_to_child_nodes( result_indices_buffer + internal_topk, result_distances_buffer + internal_topk, query_buffer, - dataset_ptr, - dataset_dim, - dataset_ld, + dataset_desc, knn_graph, graph_degree, local_visited_hashmap_ptr, @@ -814,50 +811,53 @@ __launch_bounds__(1024, 1) RAFT_KERNEL #endif } -template struct search_kernel_config { - using kernel_t = - decltype(&search_kernel); + using kernel_t = decltype(&search_kernel); template static auto choose_search_kernel(unsigned itopk_size) -> kernel_t { if (itopk_size <= 64) { - return search_kernel; + return search_kernel; } else if (itopk_size <= 128) { return search_kernel; } else if (itopk_size <= 256) { return search_kernel; } else if (itopk_size <= 512) { return search_kernel; } THROW("No kernel for parametels itopk_size %u, max_candidates %u", itopk_size, MAX_CANDIDATES); @@ -878,9 +878,21 @@ struct search_kernel_config { // Radix-based topk is used constexpr unsigned max_candidates = 32; // to avoid build failure if (itopk_size <= 256) { - return search_kernel; + return search_kernel; } else if (itopk_size <= 512) { - return search_kernel; + return search_kernel; } } THROW("No kernel for parametels itopk_size %u, num_itopk_candidates %u", @@ -891,25 +903,23 @@ struct search_kernel_config { template -void select_and_run( // raft::resources const& res, - raft::device_matrix_view dataset, - raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] +void select_and_run( + DATASET_DESCRIPTOR_T dataset_desc, + raft::device_matrix_view graph, + typename DATASET_DESCRIPTOR_T::INDEX_T* const topk_indices_ptr, // [num_queries, topk] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, - const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] - uint32_t* const num_executed_iterations, // [num_queries,] + const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* const num_executed_iterations, // [num_queries,] uint32_t topk, uint32_t num_itopk_candidates, uint32_t block_size, // uint32_t smem_size, int64_t hash_bitlen, - INDEX_T* hashmap_ptr, + typename DATASET_DESCRIPTOR_T::INDEX_T* hashmap_ptr, size_t small_hash_bitlen, size_t small_hash_reset_interval, uint32_t num_random_samplings, @@ -923,16 +933,11 @@ void select_and_run( // raft::resources const& res, cudaStream_t stream) { auto kernel = - search_kernel_config::choose_itopk_and_mx_candidates(itopk_size, - num_itopk_candidates, - block_size); - RAFT_CUDA_TRY( - cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); + search_kernel_config:: + choose_itopk_and_mx_candidates(itopk_size, num_itopk_candidates, block_size); + RAFT_CUDA_TRY(cudaFuncSetAttribute(kernel, + cudaFuncAttributeMaxDynamicSharedMemorySize, + smem_size + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte)); dim3 thread_dims(block_size, 1, 1); dim3 block_dims(1, num_queries, 1); RAFT_LOG_DEBUG( @@ -940,10 +945,7 @@ void select_and_run( // raft::resources const& res, kernel<<>>(topk_indices_ptr, topk_distances_ptr, topk, - dataset.data_handle(), - dataset.extent(1), - dataset.extent(0), - dataset.stride(0), + dataset_desc, queries_ptr, graph.data_handle(), graph.extent(1), diff --git a/cpp/include/raft/neighbors/detail/cagra/utils.hpp b/cpp/include/raft/neighbors/detail/cagra/utils.hpp index 7e403abe91..265cbfdceb 100644 --- a/cpp/include/raft/neighbors/detail/cagra/utils.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/utils.hpp @@ -111,6 +111,11 @@ _RAFT_HOST_DEVICE constexpr unsigned size_of() { return 2; } +template <> +_RAFT_HOST_DEVICE constexpr unsigned size_of() +{ + return 4; +} // max values for data types template diff --git a/cpp/include/raft/neighbors/detail/refine_host-ext.hpp b/cpp/include/raft/neighbors/detail/refine_host-ext.hpp index 69d2bd29b2..f5c8c73bb9 100644 --- a/cpp/include/raft/neighbors/detail/refine_host-ext.hpp +++ b/cpp/include/raft/neighbors/detail/refine_host-ext.hpp @@ -54,6 +54,7 @@ template distance::DistanceType metric); instantiate_raft_neighbors_refine(int64_t, float, float, int64_t); +instantiate_raft_neighbors_refine(uint32_t, float, float, int64_t); instantiate_raft_neighbors_refine(int64_t, int8_t, float, int64_t); instantiate_raft_neighbors_refine(int64_t, uint8_t, float, int64_t); diff --git a/cpp/include/raft/neighbors/detail/vpq_dataset.cuh b/cpp/include/raft/neighbors/detail/vpq_dataset.cuh index f6cd2a1ceb..f1321ba343 100644 --- a/cpp/include/raft/neighbors/detail/vpq_dataset.cuh +++ b/cpp/include/raft/neighbors/detail/vpq_dataset.cuh @@ -81,7 +81,7 @@ auto fill_missing_params_heuristics(const vpq_params& params, const DatasetT& da vpq_params r = params; double n_rows = dataset.extent(0); size_t dim = dataset.extent(1); - if (r.pq_dim == 0) { r.pq_dim = raft::div_rounding_up_safe(dim, size_t{4}); } + if (r.pq_dim == 0) { r.pq_dim = raft::div_rounding_up_safe(dim, size_t{2}); } if (r.pq_bits == 0) { r.pq_bits = 8; } if (r.vq_n_centers == 0) { r.vq_n_centers = raft::round_up_safe(std::sqrt(n_rows), 8); } if (r.vq_kmeans_trainset_fraction == 0) { diff --git a/cpp/include/raft/neighbors/refine-ext.cuh b/cpp/include/raft/neighbors/refine-ext.cuh index fc57494b22..7948a0e4f2 100644 --- a/cpp/include/raft/neighbors/refine-ext.cuh +++ b/cpp/include/raft/neighbors/refine-ext.cuh @@ -52,7 +52,7 @@ void refine(raft::resources const& handle, #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY -#define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx) \ +#define instantiate_raft_neighbors_refine_d(idx_t, data_t, distance_t, matrix_idx) \ extern template void raft::neighbors::refine( \ raft::resources const& handle, \ raft::device_matrix_view dataset, \ @@ -60,8 +60,9 @@ void refine(raft::resources const& handle, raft::device_matrix_view neighbor_candidates, \ raft::device_matrix_view indices, \ raft::device_matrix_view distances, \ - raft::distance::DistanceType metric); \ - \ + raft::distance::DistanceType metric); + +#define instantiate_raft_neighbors_refine_h(idx_t, data_t, distance_t, matrix_idx) \ extern template void raft::neighbors::refine( \ raft::resources const& handle, \ raft::host_matrix_view dataset, \ @@ -71,8 +72,14 @@ void refine(raft::resources const& handle, raft::host_matrix_view distances, \ raft::distance::DistanceType metric); -instantiate_raft_neighbors_refine(int64_t, float, float, int64_t); -instantiate_raft_neighbors_refine(int64_t, int8_t, float, int64_t); -instantiate_raft_neighbors_refine(int64_t, uint8_t, float, int64_t); +instantiate_raft_neighbors_refine_d(int64_t, float, float, int64_t); +instantiate_raft_neighbors_refine_d(int64_t, int8_t, float, int64_t); +instantiate_raft_neighbors_refine_d(int64_t, uint8_t, float, int64_t); + +instantiate_raft_neighbors_refine_h(int64_t, float, float, int64_t); +instantiate_raft_neighbors_refine_h(uint32_t, float, float, int64_t); +instantiate_raft_neighbors_refine_h(int64_t, int8_t, float, int64_t); +instantiate_raft_neighbors_refine_h(int64_t, uint8_t, float, int64_t); -#undef instantiate_raft_neighbors_refine +#undef instantiate_raft_neighbors_refine_d +#undef instantiate_raft_neighbors_refine_h diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_00_generate.py new file mode 100644 index 0000000000..e827c06be5 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_00_generate.py @@ -0,0 +1,84 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +header = """/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +""" + +trailer = """ +} // namespace raft::neighbors::cagra::detail::multi_cta_search +""" + +mxdim_team = [(128, 8), (256, 16), (512, 32), (1024, 32)] +pq_bits = [8] +subspace_dims = [2, 4] +# block = [(64, 16), (128, 8), (256, 4), (512, 2), (1024, 1)] +# mxelem = [64, 128, 256] +load_types = ["uint4"] +code_book_types = ["half"] +search_types = dict( + float_uint32=( + "float", + "uint32_t", + "float", + ), # data_t, vec_idx_t, distance_t + half_uint32=("half", "uint32_t", "float"), + int8_uint32=("int8_t", "uint32_t", "float"), + uint8_uint32=("uint8_t", "uint32_t", "float"), + float_uint64=("float", "uint64_t", "float"), + half_uint64=("half", "uint64_t", "float"), +) +# knn +for type_path, (data_t, idx_t, distance_t) in search_types.items(): + for (mxdim, team) in mxdim_team: + for code_book_t in code_book_types: + for subspace_dim in subspace_dims: + for pq_bit in pq_bits: + path = f"q_search_multi_cta_{type_path}_dim{mxdim}_t{team}_{pq_bit}pq_{subspace_dim}subd_{code_book_t}.cu" + with open(path, "w") as f: + f.write(header) + f.write( + f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<{data_t} COMMA {code_book_t} COMMA {pq_bit} COMMA {subspace_dim} COMMA {distance_t} COMMA {idx_t}>, raft::neighbors::filtering::none_cagra_sample_filter);\n" + ) + f.write(trailer) + # For pasting into CMakeLists.txt + print(f"src/neighbors/detail/cagra/{path}") diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..0bd386144c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..cd891b8e97 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..66e8357498 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..eb84983f9e --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..c66f8a0ae3 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..2a1783944c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..9fa74f1134 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..8fc91b5a10 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..4e68c00525 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..5fe526ae47 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..64c89a880a --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..c3e2427f57 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..0a8826df1c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..8019bec3e3 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..1a2a364037 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..2f661538e6 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..aec486769f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..03f27085d8 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..119d1f2921 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..666c676e87 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..e53b456a54 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..2aee739141 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..daa442b514 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..a19346d19b --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..1c1d5381c9 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..b7402a3c38 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..f493b83bee --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..8efcbe0650 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..cb770f44ba --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..0fd8ab809c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..50cf198883 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..1548ed831e --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..c60ea7c87d --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..4a68e1e43c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..df9fabd6a5 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..77075b0a44 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..374af8b56b --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..ddb80458fd --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..14e5c5d3dc --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..3c1776760a --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..e5a0a8882c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..cee80390e8 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..88678bf4ff --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..baa7ee358a --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..5c44f052f2 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..127a065fb5 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..fcf6985f97 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..f361e771b5 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/q_search_single_cta_00_generate.py new file mode 100644 index 0000000000..418d528a82 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_00_generate.py @@ -0,0 +1,89 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +header = """/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +""" + +trailer = """ +} // namespace raft::neighbors::cagra::detail::single_cta_search +""" + +mxdim_team = [(128, 8), (256, 16), (512, 32), (1024, 32)] +# block = [(64, 16), (128, 8), (256, 4), (512, 2), (1024, 1)] +# itopk_candidates = [64, 128, 256] +# itopk_size = [64, 128, 256, 512] +# mxelem = [64, 128, 256] + +pq_bits = [8] +subspace_dims = [2, 4] + +# rblock = [(256, 4), (512, 2), (1024, 1)] +# rcandidates = [32] +# rsize = [256, 512] +code_book_types = ["half"] + +search_types = dict( + float_uint32=("float", "uint32_t", "float"), # data_t, idx_t, distance_t + half_uint32=("half", "uint32_t", "float"), + int8_uint32=("int8_t", "uint32_t", "float"), + uint8_uint32=("uint8_t", "uint32_t", "float"), + float_uint64=("float", "uint64_t", "float"), + half_uint64=("half", "uint64_t", "float"), +) + +# knn +for type_path, (data_t, idx_t, distance_t) in search_types.items(): + for (mxdim, team) in mxdim_team: + for code_book_t in code_book_types: + for subspace_dim in subspace_dims: + for pq_bit in pq_bits: + path = f"q_search_single_cta_{type_path}_dim{mxdim}_t{team}_{pq_bit}pq_{subspace_dim}subd_{code_book_t}.cu" + with open(path, "w") as f: + f.write(header) + f.write( + f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<{data_t} COMMA {code_book_t} COMMA {pq_bit} COMMA {subspace_dim} COMMA {distance_t} COMMA {idx_t}>, raft::neighbors::filtering::none_cagra_sample_filter);\n" + ) + + f.write(trailer) + # For pasting into CMakeLists.txt + print(f"src/neighbors/detail/cagra/{path}") diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..d61ad0ce15 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..410d2377ec --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..60cd58bab9 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..dfe5e6f14e --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..9a5d862276 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..d92ab50a58 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..aac197d590 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..f38a10e6d0 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..5523e63038 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..b06ef3d4fd --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..1fddee0e06 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..2aee442186 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..7a15e85280 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..efba46c248 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..990582f18b --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..a55907c66f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..55fd749720 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..4b4063652a --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..bae83dc0fa --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..99492db344 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..797142e317 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..9a36c35ae0 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..e0a01e84cc --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..14de1b8941 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..b1d50fb445 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..c189a91764 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..8693ee3716 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..216ffd1ec5 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..36985d218b --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..8d55fe2b09 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..2fdb1cbc20 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..6dc3dc2ca8 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..21f8633033 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..1a3867e06f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..9cbb16188a --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..305a1754bc --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..900e1b69d9 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..a0bb2259f0 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..09d36a39a0 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..dc9cbb2b56 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..c5508a38e2 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..7024425155 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..68687bc9cf --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..60efc55a30 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..b2dfaac5fe --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..891e9ef7cc --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..91e617204c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..a01d497676 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh new file mode 100644 index 0000000000..179bf8f20f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +#define COMMA , + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py index 6f8766c86b..6f023c39f1 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py @@ -12,8 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -header = """ -/* +header = """/* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -38,45 +37,14 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \\ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \\ - template void \\ - select_and_run( \\ - raft::device_matrix_view dataset, \\ - raft::device_matrix_view graph, \\ - INDEX_T* const topk_indices_ptr, \\ - DISTANCE_T* const topk_distances_ptr, \\ - const DATA_T* const queries_ptr, \\ - const uint32_t num_queries, \\ - const INDEX_T* dev_seed_ptr, \\ - uint32_t* const num_executed_iterations, \\ - uint32_t topk, \\ - uint32_t block_size, \\ - uint32_t result_buffer_size, \\ - uint32_t smem_size, \\ - int64_t hash_bitlen, \\ - INDEX_T* hashmap_ptr, \\ - uint32_t num_cta_per_query, \\ - uint32_t num_random_samplings, \\ - uint64_t rand_xor_mask, \\ - uint32_t num_seeds, \\ - size_t itopk_size, \\ - size_t search_width, \\ - size_t min_iterations, \\ - size_t max_iterations, \\ - SAMPLE_FILTER_T sample_filter, \\ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { """ trailer = """ -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search """ @@ -103,7 +71,7 @@ with open(path, "w") as f: f.write(header) f.write( - f"instantiate_kernel_selection(\n {team}, {mxdim}, {data_t}, {idx_t}, {distance_t}, raft::neighbors::filtering::none_cagra_sample_filter);\n" + f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::standard_dataset_descriptor_t<{data_t} COMMA {idx_t} COMMA {distance_t}>, raft::neighbors::filtering::none_cagra_sample_filter);\n" ) f.write(trailer) # For pasting into CMakeLists.txt diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu index 1a3b2284bd..0e28d7a876 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 1024, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu index 36e86d9ed6..5e5e80a5de 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 8, 128, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu index 6f1af2d93f..9039496968 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 16, 256, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu index 1279f8e415..fe1c7e77e5 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 512, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu index 0dabff0df5..7ef36baf7d 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu index 72bb74cdb8..da51c16314 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu index dceea10b5d..99a4f7feb7 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu index acb8bd6a12..50cdc97dd7 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu index fa89bca45f..b2d9cdb600 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 1024, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu index 645ca61ff5..d756b295b7 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 8, 128, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu index 41b6f9b420..b1e998762c 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 16, 256, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu index 38f0ac3b04..e712de6390 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 512, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu index c462a9d359..282de4a851 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 1024, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu index f5b2874e20..71ef968575 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 8, 128, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu index 0b01428b86..7c88406d71 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 16, 256, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu index 70228a129d..360635dddb 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 512, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu index 0254f09ff0..3f129bd7cf 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 1024, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu index 2b67e7e968..053b73275e 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 8, 128, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu index 17d6722e58..a1bb20369a 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 16, 256, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu index 38f02812e2..dbbc8bdd21 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 512, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu index fa111196c6..125499e319 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 1024, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu index 1ef3c28aa3..f2117c4f80 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 8, 128, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu index d26cb44843..8e5ba0f98f 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 16, 256, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu index 4d4322f261..bea7d25392 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta.cuh new file mode 100644 index 0000000000..7fb705a2d2 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/search_single_cta.cuh @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +#define COMMA , + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py index 1515f43134..0e809e4dc3 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py @@ -12,8 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -header = """ -/* +header = """/* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -38,46 +37,14 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \\ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \\ - template void \\ - select_and_run( \\ - raft::device_matrix_view dataset, \\ - raft::device_matrix_view graph, \\ - INDEX_T* const topk_indices_ptr, \\ - DISTANCE_T* const topk_distances_ptr, \\ - const DATA_T* const queries_ptr, \\ - const uint32_t num_queries, \\ - const INDEX_T* dev_seed_ptr, \\ - uint32_t* const num_executed_iterations, \\ - uint32_t topk, \\ - uint32_t num_itopk_candidates, \\ - uint32_t block_size, \\ - uint32_t smem_size, \\ - int64_t hash_bitlen, \\ - INDEX_T* hashmap_ptr, \\ - size_t small_hash_bitlen, \\ - size_t small_hash_reset_interval, \\ - uint32_t num_random_samplings, \\ - uint64_t rand_xor_mask, \\ - uint32_t num_seeds, \\ - size_t itopk_size, \\ - size_t search_width, \\ - size_t min_iterations, \\ - size_t max_iterations, \\ - SAMPLE_FILTER_T sample_filter, \\ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::single_cta_search { """ trailer = """ -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search """ @@ -107,7 +74,7 @@ with open(path, "w") as f: f.write(header) f.write( - f"instantiate_single_cta_select_and_run(\n {team}, {mxdim}, {data_t}, {idx_t}, {distance_t}, raft::neighbors::filtering::none_cagra_sample_filter);\n" + f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::standard_dataset_descriptor_t<{data_t} COMMA {idx_t} COMMA {distance_t}>, raft::neighbors::filtering::none_cagra_sample_filter);\n" ) f.write(trailer) diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu index b8c23103ba..8a9fc408ee 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 1024, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu index 8ab1897119..c6f7c90c69 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 8, 128, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu index 9fd36b4cb9..2766286673 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 16, 256, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu index a9ee2c864b..98ee189766 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 512, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu index dadc574b65..c3ea39a729 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu index 30e043f47e..a53457656c 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu index 089e4c930f..52318efb85 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu index 3e8ffb8bf8..6451fdc7f3 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu index 29e7bfa250..e927fd0878 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 1024, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu index a004f900d0..3f3d22ee08 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 8, 128, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu index 549849b21d..a84e5b8bd7 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 16, 256, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu index 3825f572f7..af4248865b 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 512, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu index 31d83f443b..16bd0cb647 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 1024, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu index 3493ab294c..afc59c8a59 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 8, 128, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu index 6e09709994..147d31cf85 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 16, 256, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu index 4bc0158f7e..5624a71c3c 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 512, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu index 279587738e..761fb705ba 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 1024, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu index ef127d3f7d..84b76cba53 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 8, 128, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu index 7fcfdcc28e..598fff9cdf 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 16, 256, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu index a6c606d99b..e7a1a9d9c6 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 512, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu index 0b8be56614..d40b9285fc 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 1024, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu index 4c193b9408..073bb350da 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 8, 128, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu index bdf16d2f03..29b0224b4d 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 16, 256, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu index 93624df4aa..d9601de2ad 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/refine_host_float_float.cpp b/cpp/src/neighbors/detail/refine_host_float_float.cpp index c596200c0a..09dcae9c3a 100644 --- a/cpp/src/neighbors/detail/refine_host_float_float.cpp +++ b/cpp/src/neighbors/detail/refine_host_float_float.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,5 +25,6 @@ distance::DistanceType metric); instantiate_raft_neighbors_refine(int64_t, float, float, int64_t); +instantiate_raft_neighbors_refine(uint32_t, float, float, int64_t); #undef instantiate_raft_neighbors_refine diff --git a/cpp/src/neighbors/refine_float_float.cu b/cpp/src/neighbors/refine_float_float.cu index ea6892d2c5..75851eeedb 100644 --- a/cpp/src/neighbors/refine_float_float.cu +++ b/cpp/src/neighbors/refine_float_float.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,7 @@ #include -#define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx) \ +#define instantiate_raft_neighbors_refine_d(idx_t, data_t, distance_t, matrix_idx) \ template void raft::neighbors::refine( \ raft::resources const& handle, \ raft::device_matrix_view dataset, \ @@ -34,17 +34,21 @@ raft::device_matrix_view neighbor_candidates, \ raft::device_matrix_view indices, \ raft::device_matrix_view distances, \ - raft::distance::DistanceType metric); \ - \ - template void raft::neighbors::refine( \ - raft::resources const& handle, \ - raft::host_matrix_view dataset, \ - raft::host_matrix_view queries, \ - raft::host_matrix_view neighbor_candidates, \ - raft::host_matrix_view indices, \ - raft::host_matrix_view distances, \ raft::distance::DistanceType metric); -instantiate_raft_neighbors_refine(int64_t, float, float, int64_t); +#define instantiate_raft_neighbors_refine_h(idx_t, data_t, distance_t, matrix_idx) \ + template void raft::neighbors::refine( \ + raft::resources const& handle, \ + raft::host_matrix_view dataset, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbor_candidates, \ + raft::host_matrix_view indices, \ + raft::host_matrix_view distances, \ + raft::distance::DistanceType metric); + +instantiate_raft_neighbors_refine_d(int64_t, float, float, int64_t); +instantiate_raft_neighbors_refine_h(int64_t, float, float, int64_t); +instantiate_raft_neighbors_refine_h(uint32_t, float, float, int64_t); -#undef instantiate_raft_neighbors_refine +#undef instantiate_raft_neighbors_refine_d +#undef instantiate_raft_neighbors_refine_h diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index ecb871fccc..20ed3bacc7 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -372,6 +372,8 @@ if(BUILD_TESTS) test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu test/neighbors/ann_cagra/test_float_int64_t.cu test/neighbors/ann_cagra/test_half_int64_t.cu + test/neighbors/ann_cagra_vpq/test_float_int64_t.cu + test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu diff --git a/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh b/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh index 175e4ef483..5cca6d561a 100644 --- a/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh +++ b/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,87 +21,133 @@ namespace raft::neighbors::cagra::detail { namespace multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_kernel_selection( \ + DATASET_DESCRIPTOR, TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void \ + select_and_run, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::DATASET_DESCRIPTOR dataset_desc, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(standard_dataset_descriptor_t, + 32, + 1024, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(standard_dataset_descriptor_t, + 8, + 128, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(standard_dataset_descriptor_t, + 16, + 256, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(standard_dataset_descriptor_t, + 32, + 512, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection } // namespace multi_cta_search namespace single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + DATASET_DESCRIPTOR, TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void \ + select_and_run, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::DATASET_DESCRIPTOR dataset_desc, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(standard_dataset_descriptor_t, + 32, + 1024, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(standard_dataset_descriptor_t, + 8, + 128, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(standard_dataset_descriptor_t, + 16, + 256, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(standard_dataset_descriptor_t, + 32, + 512, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace single_cta_search -} // namespace raft::neighbors::cagra::detail \ No newline at end of file +} // namespace raft::neighbors::cagra::detail diff --git a/cpp/test/neighbors/ann_cagra_vpq.cuh b/cpp/test/neighbors/ann_cagra_vpq.cuh new file mode 100755 index 0000000000..503b1a413a --- /dev/null +++ b/cpp/test/neighbors/ann_cagra_vpq.cuh @@ -0,0 +1,336 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "../test_utils.cuh" +#include "ann_utils.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include + +namespace { +template +void GenerateDataset(T* const dataset_ptr, + T* const query_ptr, + const std::size_t dataset_size, + const std::size_t query_size, + const std::size_t dim, + const std::size_t num_centers, + cudaStream_t cuda_stream) +{ + auto center_list = raft::make_host_matrix(num_centers, dim); + auto host_dataset = raft::make_host_matrix(std::max(dataset_size, query_size), dim); + + std::normal_distribution dist(0, 1); + std::mt19937 mt(0); + for (std::size_t i = 0; i < center_list.size(); i++) { + center_list.data_handle()[i] = dist(mt); + } + + std::uniform_int_distribution i_dist(0, num_centers - 1); + for (std::size_t i = 0; i < dataset_size; i++) { + const auto center_index = i_dist(mt); + for (std::size_t j = 0; j < dim; j++) { + host_dataset.data_handle()[i * dim + j] = + center_list.data_handle()[center_index + j] + dist(mt) * 1e-1; + } + } + raft::copy(dataset_ptr, host_dataset.data_handle(), dataset_size * dim, cuda_stream); + + for (std::size_t i = 0; i < query_size; i++) { + const auto center_index = i_dist(mt); + for (std::size_t j = 0; j < dim; j++) { + host_dataset.data_handle()[i * dim + j] = + center_list.data_handle()[center_index + j] + dist(mt) * 1e-1; + } + } + raft::copy(query_ptr, host_dataset.data_handle(), query_size * dim, cuda_stream); +} +} // namespace + +namespace raft::neighbors::cagra { +struct AnnCagraVpqInputs { + int n_queries; + int n_rows; + int dim; + int k; + int pq_len; + int pq_bits; + graph_build_algo build_algo; + search_algo algo; + int max_queries; + int team_size; + int itopk_size; + int search_width; + raft::distance::DistanceType metric; + bool host_dataset; + bool include_serialized_dataset; + // std::optional + double min_recall; // = std::nullopt; +}; + +inline ::std::ostream& operator<<(::std::ostream& os, const AnnCagraVpqInputs& p) +{ + std::vector algo = {"single-cta", "multi_cta", "multi_kernel", "auto"}; + std::vector build_algo = {"IVF_PQ", "NN_DESCENT"}; + os << "{n_queries=" << p.n_queries << ", dataset shape=" << p.n_rows << "x" << p.dim + << ", k=" << p.k << ", pq_bits=" << p.pq_bits << ", pq_len=" << p.pq_len << ", " + << algo.at((int)p.algo) << ", max_queries=" << p.max_queries << ", itopk_size=" << p.itopk_size + << ", search_width=" << p.search_width << ", metric=" << static_cast(p.metric) + << (p.host_dataset ? ", host" : ", device") + << ", build_algo=" << build_algo.at((int)p.build_algo) << '}' << std::endl; + return os; +} + +template +class AnnCagraVpqTest : public ::testing::TestWithParam { + public: + AnnCagraVpqTest() + : stream_(resource::get_cuda_stream(handle_)), + ps(::testing::TestWithParam::GetParam()), + database(0, stream_), + search_queries(0, stream_) + { + } + + protected: + void testCagra() + { + size_t queries_size = ps.n_queries * ps.k; + std::vector indices_Cagra(queries_size); + std::vector indices_naive(queries_size); + std::vector distances_Cagra(queries_size); + std::vector distances_naive(queries_size); + + { + rmm::device_uvector distances_naive_dev(queries_size, stream_); + rmm::device_uvector indices_naive_dev(queries_size, stream_); + naive_knn(handle_, + distances_naive_dev.data(), + indices_naive_dev.data(), + search_queries.data(), + database.data(), + ps.n_queries, + ps.n_rows, + ps.dim, + ps.k, + ps.metric); + update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); + update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); + resource::sync_stream(handle_); + } + + const auto vpq_k = ps.k * 16; + { + rmm::device_uvector distances_dev(vpq_k * ps.n_queries, stream_); + rmm::device_uvector indices_dev(vpq_k * ps.n_queries, stream_); + + { + if ((ps.dim % ps.pq_len) != 0) { + // TODO: remove this requirement in the algorithm. + GTEST_SKIP() << "(TODO) At the moment dim, (" << ps.dim + << ") must be a multiple of pq_len (" << ps.pq_len << ")"; + } + cagra::index_params index_params; + index_params.compression = vpq_params{.pq_bits = static_cast(ps.pq_bits), + .pq_dim = static_cast(ps.dim / ps.pq_len)}; + index_params.metric = ps.metric; // Note: currently ony the cagra::index_params metric is + // not used for knn_graph building. + index_params.build_algo = ps.build_algo; + cagra::search_params search_params; + search_params.algo = ps.algo; + search_params.max_queries = ps.max_queries; + search_params.team_size = ps.team_size; + search_params.itopk_size = ps.itopk_size; + + auto database_view = + raft::make_device_matrix_view(database.data(), ps.n_rows, ps.dim); + + { + cagra::index index(handle_); + if (ps.host_dataset) { + auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); + auto database_host_view = raft::make_host_matrix_view( + database_host.data_handle(), ps.n_rows, ps.dim); + index = cagra::build(handle_, index_params, database_host_view); + } else { + index = cagra::build(handle_, index_params, database_view); + }; + cagra::serialize(handle_, "cagra_index", index, ps.include_serialized_dataset); + } + + auto index = cagra::deserialize(handle_, "cagra_index"); + if (!ps.include_serialized_dataset) { index.update_dataset(handle_, database_view); } + + // CAGRA-Q sanity check: we've built the right index type + auto* vpq_dataset = + dynamic_cast*>(&index.data()); + EXPECT_NE(vpq_dataset, nullptr) + << "Expected VPQ dataset, because we're testing CAGRA-Q here."; + + auto search_queries_view = raft::make_device_matrix_view( + search_queries.data(), ps.n_queries, ps.dim); + auto indices_out_view = + raft::make_device_matrix_view(indices_dev.data(), ps.n_queries, vpq_k); + auto dists_out_view = raft::make_device_matrix_view( + distances_dev.data(), ps.n_queries, vpq_k); + + cagra::search( + handle_, search_params, index, search_queries_view, indices_out_view, dists_out_view); + + { + auto host_dataset = raft::make_host_matrix(ps.n_rows, ps.dim); + raft::copy( + host_dataset.data_handle(), (const DataT*)database.data(), ps.n_rows * ps.dim, stream_); + + auto host_queries = raft::make_host_matrix(ps.n_queries, ps.dim); + raft::copy(host_queries.data_handle(), + (const DataT*)search_queries_view.data_handle(), + ps.n_queries * ps.dim, + stream_); + + auto host_index_candidate = raft::make_host_matrix(ps.n_queries, vpq_k); + raft::copy(host_index_candidate.data_handle(), + indices_out_view.data_handle(), + ps.n_queries * vpq_k, + stream_); + + auto host_indices_Cagra_view = + raft::make_host_matrix_view(indices_Cagra.data(), ps.n_queries, ps.k); + + auto host_dists_Cagra_view = + raft::make_host_matrix_view(distances_Cagra.data(), ps.n_queries, ps.k); + + resource::sync_stream(handle_); + + raft::neighbors::refine(handle_, + raft::make_const_mdspan(host_dataset.view()), + raft::make_const_mdspan(host_queries.view()), + raft::make_const_mdspan(host_index_candidate.view()), + host_indices_Cagra_view, + host_dists_Cagra_view, + ps.metric); + + raft::copy(indices_dev.data(), + host_indices_Cagra_view.data_handle(), + ps.k * ps.n_queries, + stream_); + raft::copy(distances_dev.data(), + host_dists_Cagra_view.data_handle(), + ps.k * ps.n_queries, + stream_); + resource::sync_stream(handle_); + } + } + + double min_recall = ps.min_recall; + EXPECT_TRUE(eval_neighbours(indices_naive, + indices_Cagra, + distances_naive, + distances_Cagra, + ps.n_queries, + ps.k, + 0.003, + min_recall)); + EXPECT_TRUE(eval_distances(handle_, + database.data(), + search_queries.data(), + indices_dev.data(), + distances_dev.data(), + ps.n_rows, + ps.dim, + ps.n_queries, + ps.k, + ps.metric, + 1.0e-4)); + } + } + + void SetUp() override + { + database.resize(((size_t)ps.n_rows) * ps.dim, stream_); + search_queries.resize(ps.n_queries * ps.dim, stream_); + GenerateDataset(database.data(), + search_queries.data(), + ps.n_rows, + ps.n_queries, + ps.dim, + static_cast(std::sqrt(ps.n_rows)), + stream_); + resource::sync_stream(handle_); + } + + void TearDown() override + { + resource::sync_stream(handle_); + database.resize(0, stream_); + search_queries.resize(0, stream_); + } + + private: + raft::resources handle_; + rmm::cuda_stream_view stream_; + AnnCagraVpqInputs ps; + rmm::device_uvector database; + rmm::device_uvector search_queries; +}; + +const std::vector vpq_inputs = raft::util::itertools::product( + {100}, // n_queries + {1000, 10000}, // n_rows + {128, 132, 192, 256, 512, 768}, // dim + {8, 12}, // k + {2}, // pq_len + {8}, // pq_bits + {graph_build_algo::NN_DESCENT}, // build_algo + {search_algo::SINGLE_CTA, search_algo::MULTI_CTA}, // algo + {0}, // max_queries + {0}, // team_size + {512}, // itopk_size + {1}, // search_width + {raft::distance::DistanceType::L2Expanded}, // metric + {false}, // host_dataset + {true}, // include_serialized_dataset + {0.8} // min_recall +); + +} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu b/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu new file mode 100644 index 0000000000..f60edb5ed6 --- /dev/null +++ b/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#undef RAFT_EXPLICIT_INSTANTIATE_ONLY +#include "../ann_cagra_vpq.cuh" + +#include + +namespace raft::neighbors::cagra { + +typedef AnnCagraVpqTest AnnCagraVpqTestF_I64; +TEST_P(AnnCagraVpqTestF_I64, AnnCagraVpq) { this->testCagra(); } + +INSTANTIATE_TEST_CASE_P(AnnCagraVpqTest, AnnCagraVpqTestF_I64, ::testing::ValuesIn(vpq_inputs)); + +} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu b/cpp/test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu new file mode 100644 index 0000000000..19d3f32250 --- /dev/null +++ b/cpp/test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ann_cagra_vpq.cuh" + +#include + +namespace raft::neighbors::cagra { + +typedef AnnCagraVpqTest AnnCagraVpqTestF_U32; +TEST_P(AnnCagraVpqTestF_U32, AnnCagraVpq) { this->testCagra(); } + +INSTANTIATE_TEST_CASE_P(AnnCagraVpqTest, AnnCagraVpqTestF_U32, ::testing::ValuesIn(vpq_inputs)); + +} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_utils.cuh b/cpp/test/neighbors/ann_utils.cuh index 6be2ac7fc7..3e0bead665 100644 --- a/cpp/test/neighbors/ann_utils.cuh +++ b/cpp/test/neighbors/ann_utils.cuh @@ -300,7 +300,7 @@ auto eval_distances(raft::resources const& handle, raft::matrix::copy_rows( handle, - make_device_matrix_view(x, k, n_cols), + make_device_matrix_view(x, n_rows, n_cols), y.view(), make_device_vector_view(neighbors + i * k, k));