Skip to content

Commit

Permalink
IVF-PQ: use device properties helper (#1035)
Browse files Browse the repository at this point in the history
Use raft handle's lazy-loading helper `get_device_properties` instead of explicitly calling `cudaGetDeviceProperties` on every kernel launch, which is a costly operation.

Authors:
  - Artem M. Chirkin (https://github.com/achirkin)

Approvers:
  - Tamas Bela Feher (https://github.com/tfeher)

URL: #1035
  • Loading branch information
achirkin authored Nov 18, 2022
1 parent f2b8ae0 commit d6df557
Showing 1 changed file with 4 additions and 8 deletions.
12 changes: 4 additions & 8 deletions cpp/include/raft/spatial/knn/detail/ivf_pq_search.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1019,7 +1019,8 @@ struct ivfpq_compute_similarity {
* beyond this limit do not consider increasing the number of active blocks per SM
* would improve locality anymore.
*/
static inline auto select(bool manage_local_topk,
static inline auto select(const cudaDeviceProp& dev_props,
bool manage_local_topk,
int locality_hint,
double preferred_shmem_carveout,
uint32_t pq_bits,
Expand All @@ -1029,12 +1030,6 @@ struct ivfpq_compute_similarity {
uint32_t n_probes,
uint32_t topk) -> selected
{
cudaDeviceProp dev_props;
{
int cur_dev;
RAFT_CUDA_TRY(cudaGetDevice(&cur_dev));
RAFT_CUDA_TRY(cudaGetDeviceProperties(&dev_props, cur_dev));
}
// Shared memory for storing the lookup table
size_t lut_mem = sizeof(LutT) * (pq_dim << pq_bits);
// Shared memory for storing pre-computed pieces to speedup the lookup table construction
Expand Down Expand Up @@ -1364,7 +1359,8 @@ void ivfpq_search_worker(const handle_t& handle,
}

auto search_instance =
ivfpq_compute_similarity<IdxT, ScoreT, LutT>::select(manage_local_topk,
ivfpq_compute_similarity<IdxT, ScoreT, LutT>::select(handle.get_device_properties(),
manage_local_topk,
coresidency,
preferred_shmem_carveout,
index.pq_bits(),
Expand Down

0 comments on commit d6df557

Please sign in to comment.