-
Notifications
You must be signed in to change notification settings - Fork 197
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add explicit instantiations for IVF-PQ search kernels used in tests (#…
…2212) Compilation of IVF-PQ search kernels can be time consuming. In `libraft.so` the compilation is done in parallel for kernels without filtering and with `int64_t` index type. We have test with `uint32_t` index type as well as tests for `bitset_filter` with both 32 and 64 bit index types. This PR adds explicit template instantiations for the test. This way we avoid repeated compilation of the kernels with filter and this also enables parallel compilation of the `compute_similarity` kernel for different template types. The kernels with these additional type parameters are not added to `libraft.so`, only linked together with the test executable. Note that this PR does not increase the number of compiled kernels, but it enables to compile them in parallel. Authors: - Tamas Bela Feher (https://github.com/tfeher) Approvers: - Artem M. Chirkin (https://github.com/achirkin) - Ben Frederickson (https://github.com/benfred) URL: #2212
- Loading branch information
Showing
45 changed files
with
1,245 additions
and
486 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
71 changes: 71 additions & 0 deletions
71
cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity_template.cuh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
|
||
/* | ||
* Copyright (c) 2024, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
/* | ||
* NOTE: this file is to be used in source files generated by | ||
* src/neighbors/detailivf_pq_compute_similarity_00_generate.py | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh> | ||
#include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh> | ||
#include <raft/neighbors/sample_filter.cuh> | ||
|
||
#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( \ | ||
OutT, LutT, IvfSampleFilterT) \ | ||
template auto \ | ||
raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT, IvfSampleFilterT>( \ | ||
const cudaDeviceProp& dev_props, \ | ||
bool manage_local_topk, \ | ||
int locality_hint, \ | ||
double preferred_shmem_carveout, \ | ||
uint32_t pq_bits, \ | ||
uint32_t pq_dim, \ | ||
uint32_t precomp_data_count, \ | ||
uint32_t n_queries, \ | ||
uint32_t n_probes, \ | ||
uint32_t topk) \ | ||
->raft::neighbors::ivf_pq::detail::selected<OutT, LutT, IvfSampleFilterT>; \ | ||
\ | ||
template void \ | ||
raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT, IvfSampleFilterT>( \ | ||
raft::neighbors::ivf_pq::detail::selected<OutT, LutT, IvfSampleFilterT> s, \ | ||
rmm::cuda_stream_view stream, \ | ||
uint32_t dim, \ | ||
uint32_t n_probes, \ | ||
uint32_t pq_dim, \ | ||
uint32_t n_queries, \ | ||
uint32_t queries_offset, \ | ||
raft::distance::DistanceType metric, \ | ||
raft::neighbors::ivf_pq::codebook_gen codebook_kind, \ | ||
uint32_t topk, \ | ||
uint32_t max_samples, \ | ||
const float* cluster_centers, \ | ||
const float* pq_centers, \ | ||
const uint8_t* const* pq_dataset, \ | ||
const uint32_t* cluster_labels, \ | ||
const uint32_t* _chunk_indices, \ | ||
const float* queries, \ | ||
const uint32_t* index_list, \ | ||
float* query_kths, \ | ||
IvfSampleFilterT sample_filter, \ | ||
LutT* lut_scores, \ | ||
OutT* _out_scores, \ | ||
uint32_t* _out_indices); | ||
|
||
#define COMMA , |
Oops, something went wrong.